diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -1020,6 +1020,7 @@ virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef CapturedVars, + ArrayRef CapturedVarsElemTypes, const Expr *IfCond, llvm::Value *NumThreads); /// Emits a critical region. @@ -1998,6 +1999,7 @@ void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef CapturedVars, + ArrayRef CapturedVarsElemTypes, const Expr *IfCond, llvm::Value *NumThreads) override; /// Emits a critical region. diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1263,7 +1263,7 @@ CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, HasCancel, OutlinedHelperName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); + return CGF.GenerateOpenMPCapturedStmtFunctionAggregate(*CS, D.getBeginLoc()); } llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( @@ -2036,11 +2036,11 @@ CGF.EmitBlock(ContBlock, /*IsFinished=*/true); } -void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Function *OutlinedFn, - ArrayRef CapturedVars, - const Expr *IfCond, - llvm::Value *NumThreads) { +void CGOpenMPRuntime::emitParallelCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, + ArrayRef CapturedVars, + ArrayRef CapturedVarsElemTypes, const Expr *IfCond, + llvm::Value *NumThreads) { if (!CGF.HaveInsertPoint()) return; llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); @@ -12840,12 +12840,11 @@ llvm_unreachable("Not supported in SIMD-only mode"); } -void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, - SourceLocation Loc, - llvm::Function *OutlinedFn, - ArrayRef CapturedVars, - const Expr *IfCond, - llvm::Value *NumThreads) { +void CGOpenMPSIMDRuntime::emitParallelCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, + ArrayRef CapturedVars, + ArrayRef CapturedVarsElemTypes, const Expr *IfCond, + llvm::Value *NumThreads) { llvm_unreachable("Not supported in SIMD-only mode"); } diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -263,6 +263,7 @@ void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef CapturedVars, + ArrayRef CapturedVarsElemTypes, const Expr *IfCond, llvm::Value *NumThreads) override; /// Emit an implicit/explicit barrier for OpenMP threads. diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -1517,18 +1517,17 @@ emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); } -void CGOpenMPRuntimeGPU::emitParallelCall(CodeGenFunction &CGF, - SourceLocation Loc, - llvm::Function *OutlinedFn, - ArrayRef CapturedVars, - const Expr *IfCond, - llvm::Value *NumThreads) { +void CGOpenMPRuntimeGPU::emitParallelCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, + ArrayRef CapturedVars, + ArrayRef CapturedVarsElemTypes, const Expr *IfCond, + llvm::Value *NumThreads) { if (!CGF.HaveInsertPoint()) return; - auto &&ParallelGen = [this, Loc, OutlinedFn, CapturedVars, IfCond, - NumThreads](CodeGenFunction &CGF, - PrePostActionTy &Action) { + auto &&ParallelGen = [this, Loc, OutlinedFn, CapturedVars, + CapturedVarsElemTypes, IfCond, NumThreads]( + CodeGenFunction &CGF, PrePostActionTy &Action) { CGBuilderTy &Bld = CGF.Builder; llvm::Value *NumThreadsVal = NumThreads; llvm::Function *WFn = WrapperFunctionsMap[OutlinedFn]; @@ -1542,25 +1541,49 @@ // TODO: Is that needed? CodeGenFunction::OMPPrivateScope PrivateArgScope(CGF); - Address CapturedVarsAddrs = CGF.CreateDefaultAlignTempAlloca( - llvm::ArrayType::get(CGM.VoidPtrTy, CapturedVars.size()), - "captured_vars_addrs"); - // There's something to share. - if (!CapturedVars.empty()) { - // Prepare for parallel region. Indicate the outlined function. - ASTContext &Ctx = CGF.getContext(); - unsigned Idx = 0; - for (llvm::Value *V : CapturedVars) { - Address Dst = Bld.CreateConstArrayGEP(CapturedVarsAddrs, Idx); - llvm::Value *PtrV; - if (V->getType()->isIntegerTy()) - PtrV = Bld.CreateIntToPtr(V, CGF.VoidPtrTy); - else - PtrV = Bld.CreatePointerBitCastOrAddrSpaceCast(V, CGF.VoidPtrTy); - CGF.EmitStoreOfScalar(PtrV, Dst, /*Volatile=*/false, - Ctx.getPointerType(Ctx.VoidPtrTy)); - ++Idx; - } + assert(CapturedVars.size() == 1 && + "Expected single aggregate argument to outlined function"); + + // Globalize the single aggregate argument, if needed, or use a local + // alloca, or emit null when there are no arguments. + llvm::Value *AggregateV = CapturedVars[0]; + assert(AggregateV->getType()->isPointerTy() && + "Expected pointer type for aggregate argument."); + + assert(CapturedVarsElemTypes.size() == 1 && + "Expected single element in array of types."); + llvm::Type *PtrElemTy = CapturedVarsElemTypes[0]; + auto &DL = CGM.getDataLayout(); + unsigned AllocSize = DL.getTypeAllocSize(PtrElemTy); + + llvm::CallBase *GlobalPtr = nullptr; + llvm::Value *AggregatePtr = nullptr; + + if (AllocSize) { + llvm::AllocaInst *LocalAlloc = + CGF.CreateTempAlloca(PtrElemTy, ".tmp.outlined.agg.arg"); + llvm::Value *LocalPtr = Bld.CreatePointerCast(LocalAlloc, CGF.VoidPtrTy); + GlobalPtr = + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_alloc_shared), + {llvm::ConstantInt::get(CGM.SizeTy, AllocSize)}); + GlobalPtr->addRetAttr(llvm::Attribute::get( + CGM.getLLVMContext(), llvm::Attribute::Alignment, + CGM.getContext().getTargetInfo().getNewAlign() / 8)); + + llvm::Value *AllocArgs[] = {LocalPtr, GlobalPtr}; + AggregatePtr = CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_alloc_aggregate_arg), + AllocArgs); + + llvm::Value *CapturedVarVal = Bld.CreateAlignedLoad( + PtrElemTy, AggregateV, DL.getABITypeAlign(PtrElemTy)); + llvm::Value *AggregatePtrCast = Bld.CreatePointerBitCastOrAddrSpaceCast( + AggregatePtr, PtrElemTy->getPointerTo()); + Bld.CreateDefaultAlignedStore(CapturedVarVal, AggregatePtrCast); + } else { + AggregatePtr = llvm::Constant::getNullValue(OMPBuilder.VoidPtr); } llvm::Value *IfCondVal = nullptr; @@ -1576,21 +1599,29 @@ NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.Int32Ty), assert(IfCondVal && "Expected a value"); + assert(AggregatePtr && "Expected non-null aggregate pointer value"); + // Create the parallel call. llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); - llvm::Value *Args[] = { - RTLoc, - getThreadID(CGF, Loc), - IfCondVal, - NumThreadsVal, - llvm::ConstantInt::get(CGF.Int32Ty, -1), - FnPtr, - ID, - Bld.CreateBitOrPointerCast(CapturedVarsAddrs.getPointer(), - CGF.VoidPtrPtrTy), - llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())}; + llvm::Value *Args[] = {RTLoc, + getThreadID(CGF, Loc), + IfCondVal, + NumThreadsVal, + llvm::ConstantInt::get(CGF.Int32Ty, -1), + FnPtr, + ID, + AggregatePtr}; CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_parallel_51), Args); + + if (AllocSize) { + assert(GlobalPtr && "Expected non-null global pointer value"); + // Pop global memory used for argument allocation. + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_free_shared), + {GlobalPtr, llvm::ConstantInt::get(CGM.SizeTy, AllocSize)}); + } }; RegionCodeGenTy RCG(ParallelGen); @@ -3528,7 +3559,6 @@ D.getBeginLoc(), D.getBeginLoc()); const auto *RD = CS.getCapturedRecordDecl(); - auto CurField = RD->field_begin(); Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".zero.addr"); @@ -3540,70 +3570,44 @@ Args.emplace_back(ZeroAddr.getPointer()); CGBuilderTy &Bld = CGF.Builder; - auto CI = CS.capture_begin(); // Use global memory for data sharing. // Handle passing of global args to workers. Address GlobalArgs = - CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "global_args"); + CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrTy, "global_args"); llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer(); llvm::Value *DataSharingArgs[] = {GlobalArgsPtr}; - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_get_shared_variables), - DataSharingArgs); + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_get_shared_variables_aggregate), + DataSharingArgs); // Retrieve the shared variables from the list of references returned // by the runtime. Pass the variables to the outlined function. - Address SharedArgListAddress = Address::invalid(); - if (CS.capture_size() > 0 || - isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) { - SharedArgListAddress = CGF.EmitLoadOfPointer( + Address SharedArgAggregateAddress = Address::invalid(); + if (CS.capture_size() > 0) { + SharedArgAggregateAddress = CGF.EmitLoadOfPointer( GlobalArgs, CGF.getContext() - .getPointerType(CGF.getContext().VoidPtrTy) + .getPointerType(CGF.getContext().VoidTy) .castAs()); - } - unsigned Idx = 0; - if (isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) { - Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx); + // Load the outlined arg aggregate struct. + ASTContext &CGFContext = CGF.getContext(); + QualType RecordPointerTy = + CGFContext.getPointerType(CGFContext.getRecordType(RD)); Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast( - Src, CGF.SizeTy->getPointerTo(), CGF.SizeTy); - llvm::Value *LB = CGF.EmitLoadOfScalar( - TypedAddress, - /*Volatile=*/false, - CGF.getContext().getPointerType(CGF.getContext().getSizeType()), - cast(D).getLowerBoundVariable()->getExprLoc()); - Args.emplace_back(LB); - ++Idx; - Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx); - TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast( - Src, CGF.SizeTy->getPointerTo(), CGF.SizeTy); - llvm::Value *UB = CGF.EmitLoadOfScalar( - TypedAddress, - /*Volatile=*/false, - CGF.getContext().getPointerType(CGF.getContext().getSizeType()), - cast(D).getUpperBoundVariable()->getExprLoc()); - Args.emplace_back(UB); - ++Idx; - } - if (CS.capture_size() > 0) { + SharedArgAggregateAddress, + CGF.ConvertTypeForMem(RecordPointerTy)->getPointerTo(), + CGF.ConvertTypeForMem(RecordPointerTy)); + llvm::Value *Arg = TypedAddress.getPointer(); + Args.emplace_back(Arg); + } else { + // If there are no captured arguments, use nullptr. ASTContext &CGFContext = CGF.getContext(); - for (unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) { - QualType ElemTy = CurField->getType(); - Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, I + Idx); - Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast( - Src, CGF.ConvertTypeForMem(CGFContext.getPointerType(ElemTy)), - CGF.ConvertTypeForMem(ElemTy)); - llvm::Value *Arg = CGF.EmitLoadOfScalar(TypedAddress, - /*Volatile=*/false, - CGFContext.getPointerType(ElemTy), - CI->getLocation()); - if (CI->capturesVariableByCopy() && - !CI->getCapturedVar()->getType()->isAnyPointerType()) { - Arg = castValueToType(CGF, Arg, ElemTy, CGFContext.getUIntPtrType(), - CI->getLocation()); - } - Args.emplace_back(Arg); - } + QualType RecordPointerTy = + CGFContext.getPointerType(CGFContext.getRecordType(RD)); + llvm::Value *Arg = + llvm::Constant::getNullValue(CGF.ConvertTypeForMem(RecordPointerTy)); + Args.emplace_back(Arg); } emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedParallelFn, Args); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -90,6 +90,19 @@ auto *VD = C.getCapturedVar(); assert(VD == VD->getCanonicalDecl() && "Canonical decl must be captured."); + // Skip implicit captures for combined distribute loop bounds, + // those will be handled by later codegen. + if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) { + const auto *LoopDirective = cast(&S); + VarDecl *PrevLB = cast( + cast(LoopDirective->getPrevLowerBoundVariable()) + ->getDecl()); + VarDecl *PrevUB = cast( + cast(LoopDirective->getPrevUpperBoundVariable()) + ->getDecl()); + if (VD == PrevLB || VD == PrevUB) + continue; + } DeclRefExpr DRE( CGF.getContext(), const_cast(VD), isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && @@ -265,6 +278,19 @@ continue; assert(VD == VD->getCanonicalDecl() && "Canonical decl must be captured."); + // Skip implicit captures for combined distribute loop bounds, + // those will be handled by later codegen. + if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) { + const auto *LoopDirective = cast(&S); + VarDecl *PrevLB = cast( + cast(LoopDirective->getPrevLowerBoundVariable()) + ->getDecl()); + VarDecl *PrevUB = cast( + cast(LoopDirective->getPrevUpperBoundVariable()) + ->getDecl()); + if (VD == PrevLB || VD == PrevUB) + continue; + } DeclRefExpr DRE(CGF.getContext(), const_cast(VD), isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && @@ -322,6 +348,34 @@ return CGM.getSize(SizeInChars); } +void CodeGenFunction::GenerateOpenMPCapturedVarsAggregate( + const CapturedStmt &S, SmallVectorImpl &CapturedVars, + SmallVectorImpl &CapturedVarsElemTypes) { + const RecordDecl *RD = S.getCapturedRecordDecl(); + QualType RecordTy = getContext().getRecordType(RD); + // Create the aggregate argument struct for the outlined function. + LValue AggLV = MakeAddrLValue( + CreateMemTemp(RecordTy, "omp.outlined.arg.agg."), RecordTy); + + // Initialize the aggregate with captured values. + auto CurField = RD->field_begin(); + for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), + E = S.capture_init_end(); + I != E; ++I, ++CurField) { + LValue LV = EmitLValueForFieldInitialization(AggLV, *CurField); + // Initialize for VLA. + if (CurField->hasCapturedVLAType()) { + EmitLambdaVLACapture(CurField->getCapturedVLAType(), LV); + } else + // Initialize for capturesThis, capturesVariableByCopy, + // capturesVariable + EmitInitializerForField(*CurField, LV, *I); + } + + CapturedVars.push_back(AggLV.getPointer(*this)); + CapturedVarsElemTypes.push_back(ConvertTypeForMem(AggLV.getType())); +} + void CodeGenFunction::GenerateOpenMPCapturedVars( const CapturedStmt &S, SmallVectorImpl &CapturedVars) { const RecordDecl *RD = S.getCapturedRecordDecl(); @@ -421,6 +475,103 @@ }; } // namespace +static llvm::Function *emitOutlinedFunctionPrologueAggregate( + CodeGenFunction &CGF, FunctionArgList &Args, + llvm::MapVector> + &LocalAddrs, + llvm::DenseMap> + &VLASizes, + llvm::Value *&CXXThisValue, const CapturedStmt &CS, SourceLocation Loc, + StringRef FunctionName) { + const CapturedDecl *CD = CS.getCapturedDecl(); + const RecordDecl *RD = CS.getCapturedRecordDecl(); + assert(CD->hasBody() && "missing CapturedDecl body"); + + CXXThisValue = nullptr; + // Build the argument list. + CodeGenModule &CGM = CGF.CGM; + ASTContext &Ctx = CGM.getContext(); + Args.append(CD->param_begin(), CD->param_end()); + + // Create the function declaration. + const CGFunctionInfo &FuncInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); + llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); + + auto *F = + llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, + FunctionName, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); + if (CD->isNothrow()) + F->setDoesNotThrow(); + F->setDoesNotRecurse(); + + // Generate the function. + CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, Loc, Loc); + Address ContextAddr = CGF.GetAddrOfLocalVar(CD->getContextParam()); + llvm::Value *ContextV = CGF.Builder.CreateLoad(ContextAddr); + LValue ContextLV = CGF.MakeNaturalAlignAddrLValue( + ContextV, CGM.getContext().getTagDeclType(RD)); + const auto *I = CS.captures().begin(); + for (const FieldDecl *FD : RD->fields()) { + LValue FieldLV = CGF.EmitLValueForFieldInitialization(ContextLV, FD); + // Do not map arguments if we emit function with non-original types. + Address LocalAddr = FieldLV.getAddress(CGF); + // If we are capturing a pointer by copy we don't need to do anything, just + // use the value that we get from the arguments. + if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { + const VarDecl *CurVD = I->getCapturedVar(); + LocalAddrs.insert({FD, {CurVD, LocalAddr}}); + ++I; + continue; + } + + LValue ArgLVal = + CGF.MakeAddrLValue(LocalAddr, FD->getType(), AlignmentSource::Decl); + if (FD->hasCapturedVLAType()) { + llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); + const VariableArrayType *VAT = FD->getCapturedVLAType(); + VLASizes.try_emplace(FD, VAT->getSizeExpr(), ExprArg); + } else if (I->capturesVariable()) { + const VarDecl *Var = I->getCapturedVar(); + QualType VarTy = Var->getType(); + Address ArgAddr = ArgLVal.getAddress(CGF); + if (ArgLVal.getType()->isLValueReferenceType()) { + ArgAddr = CGF.EmitLoadOfReference(ArgLVal); + } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { + assert(ArgLVal.getType()->isPointerType()); + ArgAddr = CGF.EmitLoadOfPointer( + ArgAddr, ArgLVal.getType()->castAs()); + } + LocalAddrs.insert( + {FD, + {Var, Address(ArgAddr.getPointer(), ArgAddr.getElementType(), + Ctx.getDeclAlign(Var))}}); + } else if (I->capturesVariableByCopy()) { + assert(!FD->getType()->isAnyPointerType() && + "Not expecting a captured pointer."); + const VarDecl *Var = I->getCapturedVar(); + Address CopyAddr = CGF.CreateMemTemp(FD->getType(), Ctx.getDeclAlign(FD), + Var->getName()); + LValue CopyLVal = + CGF.MakeAddrLValue(CopyAddr, FD->getType(), AlignmentSource::Decl); + + RValue ArgRVal = CGF.EmitLoadOfLValue(ArgLVal, I->getLocation()); + CGF.EmitStoreThroughLValue(ArgRVal, CopyLVal); + + LocalAddrs.insert({FD, {Var, CopyAddr}}); + } else { + // If 'this' is captured, load it into CXXThisValue. + assert(I->capturesThis()); + CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); + LocalAddrs.insert({FD, {nullptr, ArgLVal.getAddress(CGF)}}); + } + ++I; + } + + return F; +} + static llvm::Function *emitOutlinedFunctionPrologue( CodeGenFunction &CGF, FunctionArgList &Args, llvm::MapVector> @@ -605,6 +756,38 @@ return F; } +llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunctionAggregate( + const CapturedStmt &S, SourceLocation Loc) { + assert( + CapturedStmtInfo && + "CapturedStmtInfo should be set when generating the captured function"); + const CapturedDecl *CD = S.getCapturedDecl(); + // Build the argument list. + FunctionArgList Args; + llvm::MapVector> LocalAddrs; + llvm::DenseMap> VLASizes; + StringRef FunctionName = CapturedStmtInfo->getHelperName(); + llvm::Function *F = emitOutlinedFunctionPrologueAggregate( + *this, Args, LocalAddrs, VLASizes, CXXThisValue, S, Loc, FunctionName); + CodeGenFunction::OMPPrivateScope LocalScope(*this); + for (const auto &LocalAddrPair : LocalAddrs) { + if (LocalAddrPair.second.first) { + auto retAddrPair = [&LocalAddrPair]() { + return LocalAddrPair.second.second; + }; + LocalScope.addPrivate(LocalAddrPair.second.first, retAddrPair()); + } + } + (void)LocalScope.Privatize(); + for (const auto &VLASizePair : VLASizes) + VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; + PGO.assignRegionCounters(GlobalDecl(CD), F); + CapturedStmtInfo->EmitBody(*this, CD->getBody()); + (void)LocalScope.ForceCleanup(); + FinishFunction(CD->getBodyRBrace()); + return F; +} + llvm::Function * CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, SourceLocation Loc) { @@ -1473,9 +1656,8 @@ /// Codegen lambda for appending distribute lower and upper bounds to outlined /// parallel function. This is necessary for combined constructs such as /// 'distribute parallel for' -typedef llvm::function_ref &)> +typedef llvm::function_ref CodeGenBoundParametersTy; } // anonymous namespace @@ -1568,14 +1750,18 @@ OMPParallelScope Scope(CGF, S); llvm::SmallVector CapturedVars; + llvm::SmallVector CapturedVarsElemTypes; + // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk // lower and upper bounds with the pragma 'for' chunking mechanism. // The following lambda takes care of appending the lower and upper bound // parameters when necessary - CodeGenBoundParameters(CGF, S, CapturedVars); - CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); - CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, - CapturedVars, IfCond, NumThreads); + CodeGenBoundParameters(CGF, S, *CS); + CGF.GenerateOpenMPCapturedVarsAggregate(*CS, CapturedVars, + CapturedVarsElemTypes); + CGF.CGM.getOpenMPRuntime().emitParallelCall( + CGF, S.getBeginLoc(), OutlinedFn, CapturedVars, CapturedVarsElemTypes, + IfCond, NumThreads); } static bool isAllocatableDecl(const VarDecl *VD) { @@ -1591,7 +1777,7 @@ static void emitEmptyBoundParameters(CodeGenFunction &, const OMPExecutableDirective &, - llvm::SmallVectorImpl &) {} + const CapturedStmt &) {} Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( CodeGenFunction &CGF, const VarDecl *VD) { @@ -3082,21 +3268,30 @@ static void emitDistributeParallelForDistributeInnerBoundParams( CodeGenFunction &CGF, const OMPExecutableDirective &S, - llvm::SmallVectorImpl &CapturedVars) { + const CapturedStmt &CS) { const auto &Dir = cast(S); + + // The first captured variable of the captured statement corresponds + // to inner lower bound. + VarDecl *PrevLBCapDecl = CS.captures().begin()->getCapturedVar(); + // The second captured variable corresponds to the inner upper bound. + VarDecl *PrevUBCapDecl = std::next(CS.captures().begin())->getCapturedVar(); + LValue LB = CGF.EmitLValue(cast(Dir.getCombinedLowerBoundVariable())); llvm::Value *LBCast = CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)), CGF.SizeTy, /*isSigned=*/false); - CapturedVars.push_back(LBCast); + CGF.EmitStoreOfScalar(LBCast, CGF.GetAddrOfLocalVar(PrevLBCapDecl), + /*Volatile=*/false, PrevLBCapDecl->getType()); + LValue UB = CGF.EmitLValue(cast(Dir.getCombinedUpperBoundVariable())); - llvm::Value *UBCast = CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)), CGF.SizeTy, /*isSigned=*/false); - CapturedVars.push_back(UBCast); + CGF.EmitStoreOfScalar(UBCast, CGF.GetAddrOfLocalVar(PrevUBCapDecl), + /*Volatile=*/false, PrevUBCapDecl->getType()); } static void @@ -5527,6 +5722,14 @@ LValue IL = EmitOMPHelperVar(*this, cast(S.getIsLastIterVariable())); + // Emit previous upper, lower bound captured variables, if applicable. + if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) { + EmitOMPHelperVar(*this, + cast(S.getPrevLowerBoundVariable())); + EmitOMPHelperVar(*this, + cast(S.getPrevUpperBoundVariable())); + } + OMPPrivateScope LoopScope(*this); if (EmitOMPFirstprivateClause(S, LoopScope)) { // Emit implicit barrier to synchronize threads and avoid data races @@ -6655,7 +6858,10 @@ OMPTeamsScope Scope(CGF, S); llvm::SmallVector CapturedVars; - CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); + llvm::SmallVector CapturedVarsElemTypes; + + CGF.GenerateOpenMPCapturedVarsAggregate(*CS, CapturedVars, + CapturedVarsElemTypes); CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, CapturedVars); } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3336,8 +3336,14 @@ llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K); llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S); Address GenerateCapturedStmtArgument(const CapturedStmt &S); + llvm::Function * + GenerateOpenMPCapturedStmtFunctionAggregate(const CapturedStmt &S, + SourceLocation Loc); llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, SourceLocation Loc); + void GenerateOpenMPCapturedVarsAggregate( + const CapturedStmt &S, SmallVectorImpl &CapturedVars, + SmallVectorImpl &CapturedVarsElemTypes); void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl &CapturedVars); void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy, diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4384,12 +4384,22 @@ Sema::CapturedParamNameType Params[] = { std::make_pair(".global_tid.", KmpInt32PtrTy), std::make_pair(".bound_tid.", KmpInt32PtrTy), - std::make_pair(".previous.lb.", Context.getSizeType().withConst()), - std::make_pair(".previous.ub.", Context.getSizeType().withConst()), std::make_pair(StringRef(), QualType()) // __context with shared vars }; + + VarDecl *PrevLBDecl = + buildVarDecl(*this, SourceLocation(), Context.getSizeType().withConst(), + ".captured.omp.previous.lb"); + VarDecl *PrevUBDecl = + buildVarDecl(*this, SourceLocation(), Context.getSizeType().withConst(), + ".captured.omp.previous.ub"); + ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, Params); + + MarkVariableReferenced(SourceLocation(), PrevLBDecl); + MarkVariableReferenced(SourceLocation(), PrevUBDecl); + break; } case OMPD_target_teams_distribute_parallel_for: @@ -4440,14 +4450,24 @@ Sema::CapturedParamNameType ParamsParallel[] = { std::make_pair(".global_tid.", KmpInt32PtrTy), std::make_pair(".bound_tid.", KmpInt32PtrTy), - std::make_pair(".previous.lb.", Context.getSizeType().withConst()), - std::make_pair(".previous.ub.", Context.getSizeType().withConst()), std::make_pair(StringRef(), QualType()) // __context with shared vars }; + + VarDecl *PrevLBDecl = + buildVarDecl(*this, SourceLocation(), Context.getSizeType().withConst(), + ".captured.omp.previous.lb"); + VarDecl *PrevUBDecl = + buildVarDecl(*this, SourceLocation(), Context.getSizeType().withConst(), + ".captured.omp.previous.ub"); + // Start a captured region for 'teams' or 'parallel'. Both regions have // the same implicit parameters. ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, ParamsParallel, /*OpenMPCaptureLevel=*/3); + + MarkVariableReferenced(SourceLocation(), PrevLBDecl); + MarkVariableReferenced(SourceLocation(), PrevUBDecl); + break; } @@ -4485,14 +4505,24 @@ Sema::CapturedParamNameType ParamsParallel[] = { std::make_pair(".global_tid.", KmpInt32PtrTy), std::make_pair(".bound_tid.", KmpInt32PtrTy), - std::make_pair(".previous.lb.", Context.getSizeType().withConst()), - std::make_pair(".previous.ub.", Context.getSizeType().withConst()), std::make_pair(StringRef(), QualType()) // __context with shared vars }; + + VarDecl *PrevLBDecl = + buildVarDecl(*this, SourceLocation(), Context.getSizeType().withConst(), + ".captured.omp.previous.lb"); + VarDecl *PrevUBDecl = + buildVarDecl(*this, SourceLocation(), Context.getSizeType().withConst(), + ".captured.omp.previous.ub"); + // Start a captured region for 'teams' or 'parallel'. Both regions have // the same implicit parameters. ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, ParamsParallel, /*OpenMPCaptureLevel=*/1); + + MarkVariableReferenced(SourceLocation(), PrevLBDecl); + MarkVariableReferenced(SourceLocation(), PrevUBDecl); + break; } case OMPD_target_update: @@ -9740,23 +9770,23 @@ CombEUB = SemaRef.ActOnFinishFullExpr(CombEUB.get(), /*DiscardedValue*/ false); - const CapturedDecl *CD = cast(AStmt)->getCapturedDecl(); - // We expect to have at least 2 more parameters than the 'parallel' - // directive does - the lower and upper bounds of the previous schedule. - assert(CD->getNumParams() >= 4 && - "Unexpected number of parameters in loop combined directive"); - - // Set the proper type for the bounds given what we learned from the - // enclosed loops. - ImplicitParamDecl *PrevLBDecl = CD->getParam(/*PrevLB=*/2); - ImplicitParamDecl *PrevUBDecl = CD->getParam(/*PrevUB=*/3); - - // Previous lower and upper bounds are obtained from the region - // parameters. - PrevLB = - buildDeclRefExpr(SemaRef, PrevLBDecl, PrevLBDecl->getType(), InitLoc); - PrevUB = - buildDeclRefExpr(SemaRef, PrevUBDecl, PrevUBDecl->getType(), InitLoc); + const CapturedStmt *CS = cast(AStmt); + + // Refer to captured variables from the associated captured statement of + // the combined directive. First captured variable is the previous lower + // bound, second is the previous upper bound. + VarDecl *PrevLBCapDecl = CS->captures().begin()->getCapturedVar(); + assert(PrevLBCapDecl && + "Expected captured var for previous LB in combined directive"); + VarDecl *PrevUBCapDecl = + std::next(CS->captures().begin())->getCapturedVar(); + assert(PrevLBCapDecl && + "Expected captured var for previous UB in combined directive"); + + PrevLB = buildDeclRefExpr(SemaRef, PrevLBCapDecl, + PrevLBCapDecl->getType(), InitLoc); + PrevUB = buildDeclRefExpr(SemaRef, PrevUBCapDecl, + PrevUBCapDecl->getType(), InitLoc); } } diff --git a/clang/test/AST/ast-dump-openmp-distribute-parallel-for.c b/clang/test/AST/ast-dump-openmp-distribute-parallel-for.c --- a/clang/test/AST/ast-dump-openmp-distribute-parallel-for.c +++ b/clang/test/AST/ast-dump-openmp-distribute-parallel-for.c @@ -35,231 +35,230 @@ ; } -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-distribute-parallel-for.c:3:1, line:7:1> line:3:6 test_one 'void (int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK: |-FunctionDecl [[ADDR_0:0x[a-z0-9]*]] line:3:6 test_one 'void (int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_1:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_2:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPDistributeParallelForDirective [[ADDR_3:0x[a-z0-9]*]] +// CHECK-NEXT: | `-CapturedStmt [[ADDR_4:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_5:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_6:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_7:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_8:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_9:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_10:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_11:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_12:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_8]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_13:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_14:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_15:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_16:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_8]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_17:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_18:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_19:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_20:0x[a-z0-9]*]] col:1 implicit __context 'struct (unnamed at clang/test/AST/ast-dump-openmp-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | `-VarDecl [[ADDR_8]] col:12 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_9]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_21:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_22:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_23:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_24:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_25:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_26:0x[a-z0-9]*]] line:9:6 test_two 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_27:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_28:0x[a-z0-9]*]] col:26 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_29:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPDistributeParallelForDirective [[ADDR_30:0x[a-z0-9]*]] +// CHECK-NEXT: | `-CapturedStmt [[ADDR_31:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_32:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_33:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_34:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_35:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_36:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_37:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_38:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_39:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_35]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_40:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_41:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_27]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_42:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_43:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_35]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_44:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_45:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_46:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_47:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_48:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_49:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_50:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_46]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_51:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_52:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_28]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_53:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_54:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_46]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_55:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_56:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_57:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_58:0x[a-z0-9]*]] col:1 implicit __context 'struct (unnamed at clang/test/AST/ast-dump-openmp-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_35]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_36]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_46]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_47]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_59:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_60:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_61:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_62:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_63:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_27]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_64:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_28]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_65:0x[a-z0-9]*]] line:16:6 test_three 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_66:0x[a-z0-9]*]] col:21 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_67:0x[a-z0-9]*]] col:28 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_68:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPDistributeParallelForDirective [[ADDR_69:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPCollapseClause [[ADDR_70:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ConstantExpr [[ADDR_71:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-value: Int 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_72:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | `-CapturedStmt [[ADDR_73:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_74:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_75:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_76:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_77:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_78:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_79:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_80:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_81:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_77]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_82:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_83:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_66]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_84:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_85:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_77]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_86:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_87:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_88:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_89:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_90:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_91:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_92:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_88]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_93:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_94:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_67]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_95:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_96:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_88]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_97:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_98:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_99:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_100:0x[a-z0-9]*]] col:1 implicit __context 'struct (unnamed at clang/test/AST/ast-dump-openmp-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_77]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_78]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_88]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_89]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_101:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_102:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_103:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_104:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_105:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_66]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_106:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_67]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_107:0x[a-z0-9]*]] line:23:6 test_four 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_108:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_109:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_110:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPDistributeParallelForDirective [[ADDR_111:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPCollapseClause [[ADDR_112:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ConstantExpr [[ADDR_113:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-value: Int 2 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_114:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: | `-CapturedStmt [[ADDR_115:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_116:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_117:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_118:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_119:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_120:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_121:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_122:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_123:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_119]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_124:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_125:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_108]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_126:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_127:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_119]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_128:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_129:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_130:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_131:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPDistributeParallelForDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_132:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_133:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_134:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_130]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_135:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_136:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_109]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_137:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_138:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_130]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_139:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_140:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_141:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_142:0x[a-z0-9]*]] col:1 implicit __context 'struct (unnamed at clang/test/AST/ast-dump-openmp-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_119]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_120]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_130]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_131]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_143:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_144:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_145:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_146:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_147:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_108]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_148:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_109]] 'y' 'int' +// CHECK-NEXT: `-FunctionDecl [[ADDR_149:0x[a-z0-9]*]] line:30:6 test_five 'void (int, int, int)' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_150:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_151:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_152:0x[a-z0-9]*]] col:34 used z 'int' +// CHECK-NEXT: `-CompoundStmt [[ADDR_153:0x[a-z0-9]*]] +// CHECK-NEXT: `-OMPDistributeParallelForDirective [[ADDR_154:0x[a-z0-9]*]] +// CHECK-NEXT: |-OMPCollapseClause [[ADDR_155:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ConstantExpr [[ADDR_156:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | |-value: Int 2 +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_157:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: `-CapturedStmt [[ADDR_158:0x[a-z0-9]*]] +// CHECK-NEXT: |-CapturedDecl [[ADDR_159:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | |-ForStmt [[ADDR_160:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_161:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_162:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_163:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_164:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_165:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_166:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_162]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_167:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_168:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'x' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_169:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_170:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_162]] 'i' 'int' +// CHECK-NEXT: | | `-ForStmt [[ADDR_171:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_172:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_173:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_174:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_175:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_176:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_177:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_173]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_178:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_179:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_151]] 'y' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_180:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_181:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_173]] 'i' 'int' +// CHECK-NEXT: | | `-ForStmt [[ADDR_182:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_183:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_184:0x[a-z0-9]*]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_185:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_186:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_187:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_188:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_184]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_189:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_190:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_152]] 'z' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_191:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_192:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_184]] 'i' 'int' +// CHECK-NEXT: | | `-NullStmt [[ADDR_193:0x[a-z0-9]*]] +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_194:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_195:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_196:0x[a-z0-9]*]] col:1 implicit __context 'struct (unnamed at clang/test/AST/ast-dump-openmp-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | |-VarDecl [[ADDR_162]] col:12 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_163]] 'int' 0 +// CHECK-NEXT: | |-VarDecl [[ADDR_173]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_174]] 'int' 0 +// CHECK-NEXT: | `-VarDecl [[ADDR_184]] col:16 used i 'int' cinit +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_185]] 'int' 0 +// CHECK-NEXT: |-DeclRefExpr [[ADDR_197:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_198:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_199:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_200:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_201:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'x' 'int' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_202:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_151]] 'y' 'int' +// CHECK-NEXT: `-DeclRefExpr [[ADDR_203:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_152]] 'z' 'int' diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp --- a/clang/test/OpenMP/cancel_codegen.cpp +++ b/clang/test/OpenMP/cancel_codegen.cpp @@ -84,6 +84,7 @@ // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -104,39 +105,46 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I24:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_36:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_37:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: [[R:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_38:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i8*** [[ARGV_ADDR]], i32* [[ARGC_ADDR]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i8*** [[ARGV_ADDR]], i8**** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[ARGC_ADDR]], i32** [[TMP2]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 -// CHECK1-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0 -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP7]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK1-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: @@ -144,8 +152,8 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -158,28 +166,28 @@ // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_3]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_4]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_4]], i32* [[DOTOMP_SECTIONS_LB_1]], i32* [[DOTOMP_SECTIONS_UB_2]], i32* [[DOTOMP_SECTIONS_ST_3]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_2]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP11]], 1 -// CHECK1-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 1 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_SECTIONS_UB_2]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_SECTIONS_IV_5]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_2]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp slt i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 1 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_SECTIONS_UB_2]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_SECTIONS_IV_5]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND6:%.*]] // CHECK1: omp.inner.for.cond6: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_2]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_2]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END18:%.*]] // CHECK1: omp.inner.for.body8: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4 -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_SECTIONS_EXIT15:%.*]] [ +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4 +// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_SECTIONS_EXIT15:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE9:%.*]] // CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE12:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case9: -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTCANCEL_EXIT10:%.*]], label [[DOTCANCEL_CONTINUE11:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT10:%.*]], label [[DOTCANCEL_CONTINUE11:%.*]] // CHECK1: .cancel.exit10: // CHECK1-NEXT: br label [[CANCEL_EXIT19:%.*]] // CHECK1: cancel.exit: @@ -188,9 +196,9 @@ // CHECK1: .cancel.continue11: // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT15]] // CHECK1: .omp.sections.case12: -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT13:%.*]], label [[DOTCANCEL_CONTINUE14:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTCANCEL_EXIT13:%.*]], label [[DOTCANCEL_CONTINUE14:%.*]] // CHECK1: .cancel.exit13: // CHECK1-NEXT: br label [[CANCEL_EXIT19]] // CHECK1: .cancel.continue14: @@ -198,8 +206,8 @@ // CHECK1: .omp.sections.exit15: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] // CHECK1: omp.inner.for.inc16: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4 -// CHECK1-NEXT: [[INC17:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_5]], align 4 +// CHECK1-NEXT: [[INC17:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK1-NEXT: store i32 [[INC17]], i32* [[DOTOMP_SECTIONS_IV_5]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND6]] // CHECK1: omp.inner.for.end18: @@ -207,57 +215,57 @@ // CHECK1-NEXT: br label [[CANCEL_CONT20:%.*]] // CHECK1: cancel.cont20: // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP26]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB22:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB22]], i32* [[DOTCAPTURE_EXPR_21]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP23:%.*]] = icmp slt i32 0, [[TMP25]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP23:%.*]] = icmp slt i32 0, [[TMP27]] // CHECK1-NEXT: br i1 [[CMP23]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4 -// CHECK1-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4 +// CHECK1-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: br i1 [[CMP25]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_21]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE]] ], [ [[TMP30]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE]] ], [ [[TMP32]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND26:%.*]] // CHECK1: omp.inner.for.cond26: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP27:%.*]] = icmp sle i32 [[TMP32]], [[TMP33]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP27:%.*]] = icmp sle i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: br i1 [[CMP27]], label [[OMP_INNER_FOR_BODY28:%.*]], label [[OMP_INNER_FOR_END33:%.*]] // CHECK1: omp.inner.for.body28: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP34]], 1 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP36]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I24]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load float, float* @flag, align 4 -// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP35]], 0.000000e+00 +// CHECK1-NEXT: [[TMP37:%.*]] = load float, float* @flag, align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP37]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 2) -// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 -// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTCANCEL_EXIT29:%.*]], label [[DOTCANCEL_CONTINUE30:%.*]] +// CHECK1-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 2) +// CHECK1-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK1-NEXT: br i1 [[TMP39]], label [[DOTCANCEL_EXIT29:%.*]], label [[DOTCANCEL_CONTINUE30:%.*]] // CHECK1: .cancel.exit29: // CHECK1-NEXT: br label [[CANCEL_EXIT34:%.*]] // CHECK1: cancel.exit19: @@ -272,8 +280,8 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC31:%.*]] // CHECK1: omp.inner.for.inc31: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD32:%.*]] = add nsw i32 [[TMP38]], 1 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD32:%.*]] = add nsw i32 [[TMP40]], 1 // CHECK1-NEXT: store i32 [[ADD32]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND26]] // CHECK1: omp.inner.for.end33: @@ -288,73 +296,78 @@ // CHECK1-NEXT: br label [[CANCEL_CONT35]] // CHECK1: cancel.cont35: // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) -// CHECK1-NEXT: [[TMP39:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP40:%.*]] = bitcast i8* [[TMP39]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP40]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP42:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP39]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[TMP41:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast i8* [[TMP41]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP42]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP44:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP41]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_36]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_37]]) // CHECK1-NEXT: store i32 0, i32* [[R]], align 4 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i32* [[R]]) -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: ret i32 [[TMP43]] +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_38]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[ARGC_ADDR]], i32** [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_38]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[R]], i32** [[TMP46]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_38]]) +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: ret i32 [[TMP47]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8***, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i8*** [[ARGV]], i8**** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 -// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8***, i8**** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load float, float* @flag, align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP5]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK1-NEXT: br i1 [[TMP6]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK1-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]]) // CHECK1-NEXT: br label [[RETURN:%.*]] // CHECK1: .cancel.continue: // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[TMP10]] to i8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[TMP13]] to i8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8**, i8*** [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP14]], i64 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP15]], i64 0 // CHECK1-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX1]], align 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK1: .cancel.exit2: // CHECK1-NEXT: br label [[RETURN]] // CHECK1: .cancel.continue3: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i8**, i8*** [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8*, i8** [[TMP18]], i64 0 -// CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[ARRAYIDX4]], align 8 -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[TMP19]], i64 0 -// CHECK1-NEXT: [[TMP20:%.*]] = load i8, i8* [[ARRAYIDX5]], align 1 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP20]] to i32 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV6]], [[TMP17]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i8**, i8*** [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8*, i8** [[TMP21]], i64 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[ARRAYIDX4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[TMP22]], i64 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load i8, i8* [[ARRAYIDX5]], align 1 +// CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP23]] to i32 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV6]], [[TMP20]] // CHECK1-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV7]], i8* [[ARRAYIDX5]], align 1 // CHECK1-NEXT: br label [[RETURN]] @@ -370,7 +383,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[CLEANUP_DEST_SLOT_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 @@ -382,7 +395,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) @@ -393,8 +406,8 @@ // CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 4) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 @@ -411,10 +424,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -422,34 +436,36 @@ // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: @@ -457,25 +473,26 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: cancel.cont: // CHECK1-NEXT: ret void // CHECK1: cancel.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -483,43 +500,45 @@ // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.case1: -// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK1: .cancel.exit2: // CHECK1-NEXT: br label [[CANCEL_EXIT]] // CHECK1: .cancel.continue3: @@ -527,27 +546,26 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: cancel.cont: // CHECK1-NEXT: ret void // CHECK1: cancel.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[R:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[R_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -557,114 +575,116 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[R3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[R:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[R]], i32** [[R_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[R_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[R3]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: store i32 0, i32* [[R]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 2) -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 2) +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[R3]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[R3]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[R]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[R]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[R3]] to i8* -// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB7:[0-9]+]], i32 [[TMP28]], i32 1, i64 8, i8* [[TMP29]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP27]]) +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast i32* [[R]] to i8* +// CHECK1-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB7:[0-9]+]], i32 [[TMP31]], i32 1, i64 8, i8* [[TMP32]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP33]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[R3]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB7]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[R]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB7]], i32 [[TMP31]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: cancel.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP34]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB6]], i32 [[TMP37]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[R3]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = atomicrmw add i32* [[TMP1]], i32 [[TMP35]] monotonic, align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[R]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = atomicrmw add i32* [[TMP4]], i32 [[TMP38]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -724,7 +744,10 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I36:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_49:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK3-NEXT: [[R:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_50:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK3-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 @@ -941,12 +964,16 @@ // CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB14]]) // CHECK3-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], i8* [[TMP36]]) -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_49]]) // CHECK3-NEXT: store i32 0, i32* [[R]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i32* [[R]]) -// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK3-NEXT: ret i32 [[TMP40]] +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_50]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[ARGC_ADDR]], i32** [[TMP40]], align 8 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_50]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[R]], i32** [[TMP41]], align 8 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_50]]) +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK3-NEXT: ret i32 [[TMP42]] // // // CHECK3-LABEL: define {{[^@]+}}@main..omp_par @@ -1061,10 +1088,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK3-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -1072,34 +1100,36 @@ // CHECK3-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB17:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 0 -// CHECK3-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 +// CHECK3-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0 +// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: switch i32 [[TMP6]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: switch i32 [[TMP7]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) +// CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] // CHECK3: .omp.sections.case.split: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.case.cncl: @@ -1107,8 +1137,8 @@ // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: @@ -1124,10 +1154,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK3-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -1135,44 +1166,46 @@ // CHECK3-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 -// CHECK3-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 1 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 1 +// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: switch i32 [[TMP6]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: switch i32 [[TMP7]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) +// CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] // CHECK3: .omp.sections.case.split: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.case.cncl: // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.sections.case2: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK3-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 -// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) +// CHECK3-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0 +// CHECK3-NEXT: br i1 [[TMP11]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] // CHECK3: .omp.sections.case2.split: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] // CHECK3: .omp.sections.case2.section.after: @@ -1182,8 +1215,8 @@ // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: @@ -1199,12 +1232,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[R:%.*]]) #[[ATTR5]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[R_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1214,109 +1246,111 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[R3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[R:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[R]], i32** [[R_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[R_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[R3]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[R]], align 4 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB25:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) -// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB27:[0-9]+]]) +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM6]], i32 2) +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK3: .cancel.exit: // CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK3: .cancel.continue: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[R3]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP16]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[R3]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[R]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], [[TMP19]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[R]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[R3]] to i8* -// CHECK3-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) -// CHECK3-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i32* [[R]] to i8* +// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK3-NEXT: [[TMP24:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 1, i64 8, i8* [[TMP24]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[R3]], align 4 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK3-NEXT: store i32 [[ADD13]], i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[R]], align 4 +// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: store i32 [[ADD12]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM11]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB29]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM9]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[R3]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = atomicrmw add i32* [[TMP1]], i32 [[TMP25]] monotonic, align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[R]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = atomicrmw add i32* [[TMP4]], i32 [[TMP28]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] diff --git a/clang/test/OpenMP/cancellation_point_codegen.cpp b/clang/test/OpenMP/cancellation_point_codegen.cpp --- a/clang/test/OpenMP/cancellation_point_codegen.cpp +++ b/clang/test/OpenMP/cancellation_point_codegen.cpp @@ -86,6 +86,7 @@ // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -106,45 +107,52 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I28:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 -// CHECK1-NEXT: [[AGG_CAPTURED42:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED42:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_43:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_44:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_45:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i8*** [[ARGV_ADDR]], i32* [[ARGC_ADDR]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i8*** [[ARGV_ADDR]], i8**** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[ARGC_ADDR]], i32** [[TMP2]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 -// CHECK1-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0 -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP7]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK1-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] // CHECK1: .cancel.exit1: // CHECK1-NEXT: br label [[CANCEL_EXIT]] // CHECK1: .cancel.continue2: @@ -152,8 +160,8 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -166,28 +174,28 @@ // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_5]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_6]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_SECTIONS_IL_6]], i32* [[DOTOMP_SECTIONS_LB_3]], i32* [[DOTOMP_SECTIONS_UB_4]], i32* [[DOTOMP_SECTIONS_ST_5]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_4]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp slt i32 [[TMP13]], 1 -// CHECK1-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 1 -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_SECTIONS_UB_4]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_3]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_SECTIONS_IV_7]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_4]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = icmp slt i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP15]], i32 1 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_SECTIONS_UB_4]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_3]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_SECTIONS_IV_7]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] // CHECK1: omp.inner.for.cond8: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_7]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_4]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_7]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_4]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END22:%.*]] // CHECK1: omp.inner.for.body10: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_7]], align 4 -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_SECTIONS_EXIT19:%.*]] [ +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_7]], align 4 +// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_SECTIONS_EXIT19:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE11:%.*]] // CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE14:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case11: -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT12:%.*]], label [[DOTCANCEL_CONTINUE13:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTCANCEL_EXIT12:%.*]], label [[DOTCANCEL_CONTINUE13:%.*]] // CHECK1: .cancel.exit12: // CHECK1-NEXT: br label [[CANCEL_EXIT23:%.*]] // CHECK1: cancel.exit: @@ -196,15 +204,15 @@ // CHECK1: .cancel.continue13: // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT19]] // CHECK1: .omp.sections.case14: -// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTCANCEL_EXIT15:%.*]], label [[DOTCANCEL_CONTINUE16:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTCANCEL_EXIT15:%.*]], label [[DOTCANCEL_CONTINUE16:%.*]] // CHECK1: .cancel.exit15: // CHECK1-NEXT: br label [[CANCEL_EXIT23]] // CHECK1: .cancel.continue16: -// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTCANCEL_EXIT17:%.*]], label [[DOTCANCEL_CONTINUE18:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTCANCEL_EXIT17:%.*]], label [[DOTCANCEL_CONTINUE18:%.*]] // CHECK1: .cancel.exit17: // CHECK1-NEXT: br label [[CANCEL_EXIT23]] // CHECK1: .cancel.continue18: @@ -212,8 +220,8 @@ // CHECK1: .omp.sections.exit19: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC20:%.*]] // CHECK1: omp.inner.for.inc20: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_7]], align 4 -// CHECK1-NEXT: [[INC21:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_7]], align 4 +// CHECK1-NEXT: [[INC21:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK1-NEXT: store i32 [[INC21]], i32* [[DOTOMP_SECTIONS_IV_7]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND8]] // CHECK1: omp.inner.for.end22: @@ -221,62 +229,62 @@ // CHECK1-NEXT: br label [[CANCEL_CONT24:%.*]] // CHECK1: cancel.cont24: // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]]) -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP30]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB26:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB26]], i32* [[DOTCAPTURE_EXPR_25]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP27:%.*]] = icmp slt i32 0, [[TMP29]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP27:%.*]] = icmp slt i32 0, [[TMP31]] // CHECK1-NEXT: br i1 [[CMP27]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4 -// CHECK1-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4 +// CHECK1-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4 -// CHECK1-NEXT: [[CMP29:%.*]] = icmp sgt i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4 +// CHECK1-NEXT: [[CMP29:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] // CHECK1-NEXT: br i1 [[CMP29]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP33]], [[COND_TRUE]] ], [ [[TMP34]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP35]], [[COND_TRUE]] ], [ [[TMP36]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP35]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP37]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND30:%.*]] // CHECK1: omp.inner.for.cond30: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP31:%.*]] = icmp sle i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP31:%.*]] = icmp sle i32 [[TMP38]], [[TMP39]] // CHECK1-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END39:%.*]] // CHECK1: omp.inner.for.body32: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP38]], 1 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP40]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I28]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 2) -// CHECK1-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 -// CHECK1-NEXT: br i1 [[TMP40]], label [[DOTCANCEL_EXIT33:%.*]], label [[DOTCANCEL_CONTINUE34:%.*]] +// CHECK1-NEXT: [[TMP41:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 2) +// CHECK1-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK1-NEXT: br i1 [[TMP42]], label [[DOTCANCEL_EXIT33:%.*]], label [[DOTCANCEL_CONTINUE34:%.*]] // CHECK1: .cancel.exit33: // CHECK1-NEXT: br label [[CANCEL_EXIT40:%.*]] // CHECK1: cancel.exit23: // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[CANCEL_CONT24]] // CHECK1: .cancel.continue34: -// CHECK1-NEXT: [[TMP41:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 2) -// CHECK1-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 -// CHECK1-NEXT: br i1 [[TMP42]], label [[DOTCANCEL_EXIT35:%.*]], label [[DOTCANCEL_CONTINUE36:%.*]] +// CHECK1-NEXT: [[TMP43:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 2) +// CHECK1-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK1-NEXT: br i1 [[TMP44]], label [[DOTCANCEL_EXIT35:%.*]], label [[DOTCANCEL_CONTINUE36:%.*]] // CHECK1: .cancel.exit35: // CHECK1-NEXT: br label [[CANCEL_EXIT40]] // CHECK1: .cancel.continue36: @@ -284,8 +292,8 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC37:%.*]] // CHECK1: omp.inner.for.inc37: -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD38:%.*]] = add nsw i32 [[TMP43]], 1 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD38:%.*]] = add nsw i32 [[TMP45]], 1 // CHECK1-NEXT: store i32 [[ADD38]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND30]] // CHECK1: omp.inner.for.end39: @@ -300,56 +308,59 @@ // CHECK1-NEXT: br label [[CANCEL_CONT41]] // CHECK1: cancel.cont41: // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]]) -// CHECK1-NEXT: [[TMP44:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP44]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP45]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP47:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP44]]) -// CHECK1-NEXT: [[TMP48:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..3 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP49:%.*]] = bitcast i8* [[TMP48]] to %struct.kmp_task_t_with_privates.1* -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP49]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP51:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP48]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*)) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]]) -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: ret i32 [[TMP52]] +// CHECK1-NEXT: [[TMP46:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast i8* [[TMP46]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP47]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP49:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP46]]) +// CHECK1-NEXT: [[TMP50:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_entry..3 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP51:%.*]] = bitcast i8* [[TMP50]] to %struct.kmp_task_t_with_privates.2* +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP51]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP53:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP50]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_43]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_44]]) +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_45]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[ARGC_ADDR]], i32** [[TMP54]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_45]]) +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: ret i32 [[TMP55]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8***, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i8*** [[ARGV]], i8**** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK1-NEXT: br i1 [[TMP5]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8***, i8**** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 1) +// CHECK1-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]]) // CHECK1-NEXT: br label [[RETURN:%.*]] // CHECK1: .cancel.continue: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK1-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] // CHECK1: .cancel.exit1: -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]]) // CHECK1-NEXT: br label [[RETURN]] // CHECK1: .cancel.continue2: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[TMP10]] to i8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[TMP13]] to i8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8**, i8*** [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP14]], i64 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP15]], i64 0 // CHECK1-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 // CHECK1-NEXT: br label [[RETURN]] // CHECK1: return: @@ -364,7 +375,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[CLEANUP_DEST_SLOT_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 @@ -376,7 +387,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) @@ -387,8 +398,8 @@ // CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 4) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 @@ -412,27 +423,27 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..3 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[CLEANUP_DEST_SLOT_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.1*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.1* [[TMP1]], %struct.kmp_task_t_with_privates.1** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.1*, %struct.kmp_task_t_with_privates.1** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.1* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.2* [[TMP3]] to i8* // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) @@ -442,8 +453,8 @@ // CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 -// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 // CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 4) #[[ATTR2]] // CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 @@ -460,10 +471,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -471,40 +483,42 @@ // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: -// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] // CHECK1: .cancel.exit1: // CHECK1-NEXT: br label [[CANCEL_EXIT]] // CHECK1: .cancel.continue2: @@ -512,25 +526,26 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: cancel.cont: // CHECK1-NEXT: ret void // CHECK1: cancel.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -538,49 +553,51 @@ // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE3:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: -// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] // CHECK1: .cancel.exit1: // CHECK1-NEXT: br label [[CANCEL_EXIT]] // CHECK1: .cancel.continue2: // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.case3: -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] // CHECK1: .cancel.exit4: // CHECK1-NEXT: br label [[CANCEL_EXIT]] // CHECK1: .cancel.continue5: @@ -588,26 +605,26 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: cancel.cont: // CHECK1-NEXT: ret void // CHECK1: cancel.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -620,67 +637,69 @@ // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB5]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB5]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]], i32 2) -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]], i32 2) +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 2) -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTCANCEL_EXIT6:%.*]], label [[DOTCANCEL_CONTINUE7:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]], i32 2) +// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTCANCEL_EXIT6:%.*]], label [[DOTCANCEL_CONTINUE7:%.*]] // CHECK1: .cancel.exit6: // CHECK1-NEXT: br label [[CANCEL_EXIT]] // CHECK1: .cancel.continue7: @@ -688,21 +707,21 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP25]]) -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: cancel.exit: // CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP27]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: cancel.exit: +// CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5]], i32 [[TMP29]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: omp.precond.end: // CHECK1-NEXT: br label [[CANCEL_CONT]] diff --git a/clang/test/OpenMP/debug-info-complex-byval.cpp b/clang/test/OpenMP/debug-info-complex-byval.cpp --- a/clang/test/OpenMP/debug-info-complex-byval.cpp +++ b/clang/test/OpenMP/debug-info-complex-byval.cpp @@ -12,52 +12,40 @@ // CHECK1-LABEL: define {{[^@]+}}@_Z1av -// CHECK1-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG7:![0-9]+]] { +// CHECK1-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[B:%.*]] = alloca { float, float }, align 4 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK1-NEXT: call void @llvm.dbg.declare(metadata { float, float }* [[B]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG13:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load { float, float }, { float, float }* [[B]], align 4, !dbg [[DBG14:![0-9]+]] -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to { float, float }*, !dbg [[DBG14]] -// CHECK1-NEXT: store { float, float } [[TMP0]], { float, float }* [[CONV]], align 4, !dbg [[DBG14]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[B_CASTED]], align 8, !dbg [[DBG14]] -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]]), !dbg [[DBG14]] -// CHECK1-NEXT: ret void, !dbg [[DBG15:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined._debug__ -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], <2 x float> noundef [[B_COERCE:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG16:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[B:%.*]] = alloca { float, float }, align 4 -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = bitcast { float, float }* [[B]] to <2 x float>* -// CHECK1-NEXT: store <2 x float> [[B_COERCE]], <2 x float>* [[TMP0]], align 4 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META24:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25:![0-9]+]] -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META26:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata { float, float }* [[B]], metadata [[META27:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG29:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG14:![0-9]+]] +// CHECK1-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[B]], i32 0, i32 0, !dbg [[DBG15:![0-9]+]] +// CHECK1-NEXT: [[B_REAL:%.*]] = load float, float* [[B_REALP]], align 4, !dbg [[DBG15]] +// CHECK1-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[B]], i32 0, i32 1, !dbg [[DBG15]] +// CHECK1-NEXT: [[B_IMAG:%.*]] = load float, float* [[B_IMAGP]], align 4, !dbg [[DBG15]] +// CHECK1-NEXT: [[DOTREALP:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[TMP0]], i32 0, i32 0, !dbg [[DBG14]] +// CHECK1-NEXT: [[DOTIMAGP:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[TMP0]], i32 0, i32 1, !dbg [[DBG14]] +// CHECK1-NEXT: store float [[B_REAL]], float* [[DOTREALP]], align 4, !dbg [[DBG14]] +// CHECK1-NEXT: store float [[B_IMAG]], float* [[DOTIMAGP]], align 4, !dbg [[DBG14]] +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG14]] +// CHECK1-NEXT: ret void, !dbg [[DBG17:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[B:%.*]]) #[[ATTR3:[0-9]+]] !dbg [[DBG30:![0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG18:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca { float, float }, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META34:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META30:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31:![0-9]+]] // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META36:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35]] -// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[B_ADDR]], metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35]] -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to { float, float }*, !dbg [[DBG38:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG38]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG38]] -// CHECK1-NEXT: [[TMP2:%.*]] = bitcast { float, float }* [[CONV]] to <2 x float>*, !dbg [[DBG38]] -// CHECK1-NEXT: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4, !dbg [[DBG38]] -// CHECK1-NEXT: call void @.omp_outlined._debug__(i32* [[TMP0]], i32* [[TMP1]], <2 x float> [[TMP3]]) #[[ATTR4:[0-9]+]], !dbg [[DBG38]] -// CHECK1-NEXT: ret void, !dbg [[DBG38]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META32:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata %struct.anon** [[__CONTEXT_ADDR]], metadata [[META33:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31]] +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG34:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0, !dbg [[DBG34]] +// CHECK1-NEXT: [[TMP2:%.*]] = load { float, float }, { float, float }* [[TMP1]], align 4, !dbg [[DBG34]] +// CHECK1-NEXT: store { float, float } [[TMP2]], { float, float }* [[B]], align 4, !dbg [[DBG34]] +// CHECK1-NEXT: ret void, !dbg [[DBG35:![0-9]+]] // diff --git a/clang/test/OpenMP/debug-info-openmp-array.cpp b/clang/test/OpenMP/debug-info-openmp-array.cpp --- a/clang/test/OpenMP/debug-info-openmp-array.cpp +++ b/clang/test/OpenMP/debug-info-openmp-array.cpp @@ -20,31 +20,36 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[M_ADDR]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[I]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG14:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[M_ADDR]], align 4, !dbg [[DBG15:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg [[DBG16:![0-9]+]] -// CHECK1-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG16]] -// CHECK1-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG16]] -// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16, !dbg [[DBG16]] -// CHECK1-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG16]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[__VLA_EXPR0]], metadata [[META17:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[VLA]], metadata [[META20:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]] -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[M_ADDR]], i64 [[TMP1]], i32* [[VLA]]), !dbg [[DBG25:![0-9]+]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG26:![0-9]+]] -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP3]]), !dbg [[DBG26]] -// CHECK1-NEXT: ret void, !dbg [[DBG26]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[M_ADDR]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG13:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[I]], metadata [[META14:![0-9]+]], metadata !DIExpression()), !dbg [[DBG15:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[M_ADDR]], align 4, !dbg [[DBG16:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg [[DBG17:![0-9]+]] +// CHECK1-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG17]] +// CHECK1-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG17]] +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16, !dbg [[DBG17]] +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG17]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[__VLA_EXPR0]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG20:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[VLA]], metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25:![0-9]+]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG26:![0-9]+]] +// CHECK1-NEXT: store i32* [[M_ADDR]], i32** [[TMP3]], align 8, !dbg [[DBG26]] +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG26]] +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP4]], align 8, !dbg [[DBG26]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG26]] +// CHECK1-NEXT: store i32* [[VLA]], i32** [[TMP5]], align 8, !dbg [[DBG26]] +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG26]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG27:![0-9]+]] +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP6]]), !dbg [[DBG27]] +// CHECK1-NEXT: ret void, !dbg [[DBG27]] // // -// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined._debug__ -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CEN:%.*]]) #[[ATTR3:[0-9]+]] !dbg [[DBG27:![0-9]+]] { +// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] !dbg [[DBG28:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[M_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CEN_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -56,123 +61,93 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META35:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META39:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40:![0-9]+]] // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: store i32* [[M]], i32** [[M_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[M_ADDR]], metadata [[META38:![0-9]+]], metadata !DIExpression()), !dbg [[DBG39:![0-9]+]] -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META40:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: store i32* [[CEN]], i32** [[CEN_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[CEN_ADDR]], metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG42:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[M_ADDR]], align 8, !dbg [[DBG43:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG43]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[CEN_ADDR]], align 8, !dbg [[DBG43]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IV]], metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTCAPTURE_EXPR_]], metadata [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4, !dbg [[DBG46:![0-9]+]] -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG46]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTCAPTURE_EXPR_1]], metadata [[META45]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG46]] -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0, !dbg [[DBG43]] -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1, !dbg [[DBG43]] -// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1, !dbg [[DBG43]] -// CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG43]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[I]], metadata [[META47:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: store i32 0, i32* [[I]], align 4, !dbg [[DBG48:![0-9]+]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG46]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]], !dbg [[DBG43]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata %struct.anon** [[__CONTEXT_ADDR]], metadata [[META42:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG43:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8, !dbg [[DBG43]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IV]], metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTCAPTURE_EXPR_]], metadata [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4, !dbg [[DBG46:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG46]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTCAPTURE_EXPR_1]], metadata [[META45]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG46]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0, !dbg [[DBG47:![0-9]+]] +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1, !dbg [[DBG47]] +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1, !dbg [[DBG47]] +// CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG47]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[I]], metadata [[META48:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: store i32 0, i32* [[I]], align 4, !dbg [[DBG49:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG46]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]], !dbg [[DBG47]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]], !dbg [[DBG43]] // CHECK1: omp.precond.then: -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_LB]], metadata [[META49:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG50:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_UB]], metadata [[META51:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG43]] -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_STRIDE]], metadata [[META52:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IS_LAST]], metadata [[META53:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[I3]], metadata [[META47]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG43]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, !dbg [[DBG43]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG54:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG43]] -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]], !dbg [[DBG50]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG50]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_LB]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG51:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_UB]], metadata [[META52:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG47]] +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_STRIDE]], metadata [[META53:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IS_LAST]], metadata [[META54:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[I3]], metadata [[META48]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !dbg [[DBG43]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG47]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]], !dbg [[DBG51]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG51]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG43]] -// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG50]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG47]] +// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG51]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG50]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG51]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG50]] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG50]] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ], !dbg [[DBG51]] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG51]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG43]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]], !dbg [[DBG47]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG43]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1, !dbg [[DBG48]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG48]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !dbg [[DBG55:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !dbg [[DBG57:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG58:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]], !dbg [[DBG58]] -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !dbg [[DBG59:![0-9]+]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1, !dbg [[DBG49]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG49]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !dbg [[DBG49]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !dbg [[DBG55:![0-9]+]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !dbg [[DBG57:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG58:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[IDXPROM]], !dbg [[DBG58]] +// CHECK1-NEXT: store i32 [[TMP21]], i32* [[ARRAYIDX]], align 4, !dbg [[DBG59:![0-9]+]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG60:![0-9]+]] // CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG54]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG43]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1, !dbg [[DBG43]] -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG43]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG54]], !llvm.loop [[LOOP61:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], 1, !dbg [[DBG47]] +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG47]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG43]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG54]] +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG43]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG54]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4, !dbg [[DBG54]] -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP21]]), !dbg [[DBG62:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_PRECOND_END]], !dbg [[DBG54]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4, !dbg [[DBG43]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP25]]), !dbg [[DBG62:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_PRECOND_END]], !dbg [[DBG43]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void, !dbg [[DBG63:![0-9]+]] // -// -// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CEN:%.*]]) #[[ATTR3]] !dbg [[DBG64:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[M_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CEN_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META65:![0-9]+]], metadata !DIExpression()), !dbg [[DBG66:![0-9]+]] -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG66]] -// CHECK1-NEXT: store i32* [[M]], i32** [[M_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[M_ADDR]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG66]] -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG66]] -// CHECK1-NEXT: store i32* [[CEN]], i32** [[CEN_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[CEN_ADDR]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG66]] -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[M_ADDR]], align 8, !dbg [[DBG71:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG71]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[CEN_ADDR]], align 8, !dbg [[DBG71]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG71]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG71]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[M_ADDR]], align 8, !dbg [[DBG71]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[CEN_ADDR]], align 8, !dbg [[DBG71]] -// CHECK1-NEXT: call void @.omp_outlined._debug__(i32* [[TMP3]], i32* [[TMP4]], i32* [[TMP5]], i64 [[TMP1]], i32* [[TMP6]]) #[[ATTR4:[0-9]+]], !dbg [[DBG71]] -// CHECK1-NEXT: ret void, !dbg [[DBG71]] -// -// \ No newline at end of file diff --git a/clang/test/OpenMP/debug_threadprivate_copyin.c b/clang/test/OpenMP/debug_threadprivate_copyin.c --- a/clang/test/OpenMP/debug_threadprivate_copyin.c +++ b/clang/test/OpenMP/debug_threadprivate_copyin.c @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // This testcase checks emission of debug info for threadprivate variables // present in any clause of OpenMP construct. @@ -6,13 +7,6 @@ // RUN: %clang_cc1 -debug-info-kind=constructor -x c -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s // expected-no-diagnostics -// CHECK: define internal void @.omp_outlined._debug__( -// CHECK: call void @llvm.dbg.declare(metadata ptr %.global_tid..addr, -// CHECK: call void @llvm.dbg.declare(metadata ptr %.bound_tid..addr, -// CHECK: call void @llvm.dbg.declare(metadata ptr %nt.addr -// CHECK: store ptr %gbl_dynamic_int, ptr %gbl_dynamic_int.addr, align 8 -// CHECK-NOT: call void @llvm.dbg.declare(metadata ptr %gbl_dynamic_int.addr -// CHECK-NOT: call void @llvm.dbg.declare(metadata ptr %gbl_static_int.addr extern int printf(const char *, ...); extern void omp_set_num_threads(int); @@ -24,6 +18,29 @@ #pragma omp threadprivate(gbl_dynamic_int) +// CHECK-LABEL: @main( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[NT:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[OFFSET:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[NT]], metadata [[META23:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]] +// CHECK-NEXT: store i32 0, ptr [[NT]], align 4, !dbg [[DBG24]] +// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[OFFSET]], metadata [[META25:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26:![0-9]+]] +// CHECK-NEXT: store i32 10, ptr [[OFFSET]], align 4, !dbg [[DBG26]] +// CHECK-NEXT: store i32 55, ptr @gbl_dynamic_int, align 4, !dbg [[DBG27:![0-9]+]] +// CHECK-NEXT: store i32 77, ptr @gbl_static_int, align 4, !dbg [[DBG28:![0-9]+]] +// CHECK-NEXT: call void @omp_set_num_threads(i32 noundef 4), !dbg [[DBG29:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG30:![0-9]+]] +// CHECK-NEXT: store ptr [[NT]], ptr [[TMP0]], align 8, !dbg [[DBG30]] +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG30]] +// CHECK-NEXT: store ptr @gbl_dynamic_int, ptr [[TMP1]], align 8, !dbg [[DBG30]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG30]] +// CHECK-NEXT: store ptr @gbl_static_int, ptr [[TMP2]], align 8, !dbg [[DBG30]] +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG30]] +// CHECK-NEXT: ret i32 0, !dbg [[DBG31:![0-9]+]] +// int main() { int nt = 0; int offset = 10; diff --git a/clang/test/OpenMP/declare_target_codegen_globalization.cpp b/clang/test/OpenMP/declare_target_codegen_globalization.cpp --- a/clang/test/OpenMP/declare_target_codegen_globalization.cpp +++ b/clang/test/OpenMP/declare_target_codegen_globalization.cpp @@ -28,7 +28,8 @@ // CHECK1-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) @@ -36,11 +37,16 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i64 1) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP5:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP4]], i8* [[TMP5]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to %struct.anon* +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP7]], %struct.anon* [[TMP8]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* null, i8* [[TMP6]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP5]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -48,20 +54,22 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooRi(i32* noundef nonnull align 4 dereferenceable(4) [[B]]) #[[ATTR6:[0-9]+]] -// CHECK1-NEXT: [[CALL1:%.*]] = call noundef i32 @_Z3barv() #[[ATTR6]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooRi(i32* noundef nonnull align 4 dereferenceable(4) [[B]]) #[[ATTR7:[0-9]+]] +// CHECK1-NEXT: [[CALL1:%.*]] = call noundef i32 @_Z3barv() #[[ATTR7]] // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -80,7 +88,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A]] to i32* -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooRi(i32* noundef nonnull align 4 dereferenceable(4) [[A_ON_STACK]]) #[[ATTR6]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooRi(i32* noundef nonnull align 4 dereferenceable(4) [[A_ON_STACK]]) #[[ATTR7]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[A]], i64 4) // CHECK1-NEXT: ret i32 [[CALL]] // diff --git a/clang/test/OpenMP/declare_variant_construct_codegen_1.c b/clang/test/OpenMP/declare_variant_construct_codegen_1.c --- a/clang/test/OpenMP/declare_variant_construct_codegen_1.c +++ b/clang/test/OpenMP/declare_variant_construct_codegen_1.c @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // expected-no-diagnostics #ifndef HEADER @@ -31,13 +32,11 @@ void vxv(int *v1, int *v2, int *v3, int n) { for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i]; } -// CK1: define dso_local void @vxv void p_vxv(int *v1, int *v2, int *v3, int n) { #pragma omp for for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 3; } -// CK1: define dso_local void @p_vxv #pragma omp declare target void t_vxv(int *v1, int *v2, int *v3, int n) { @@ -45,10 +44,8 @@ for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 2; } #pragma omp end declare target -// CK1: define dso_local void @t_vxv -// CK1-LABEL: define {{[^@]+}}@test int test(void) { int v1[N], v2[N], v3[N]; @@ -63,27 +60,18 @@ { vxv(v1, v2, v3, N); } -// CK1: call void @__omp_offloading_[[OFFLOAD:.+]]({{.+}}) vxv(v1, v2, v3, N); -// CK1: call void @vxv #pragma omp parallel { vxv(v1, v2, v3, N); } -// CK1: call void ({{.+}}) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, [100 x i32]*, [100 x i32]*, [100 x i32]*)* [[PARALLEL_REGION:@.+]] to void return 0; } -// CK1: define internal void @__omp_offloading_[[OFFLOAD]]({{.+}}) -// CK1: call void ({{.+}}) @__kmpc_fork_teams(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, [100 x i32]*, [100 x i32]*, [100 x i32]*)* [[TARGET_REGION:@.+]] to void -// CK1: define internal void [[TARGET_REGION]]( -// CK1: call void @t_vxv -// CK1: define internal void [[PARALLEL_REGION]]( -// CK1: call void @p_vxv #endif // CK1 // RUN: %clang_cc1 -no-opaque-pointers -DCK2 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK2 @@ -146,7 +134,6 @@ v3[i][j][k] = v1[i][j][k] * v2[i][j][k]; } -// CK2-LABEL: define {{[^@]+}}@test void test(int ***v1, int ***v2, int ***v3, int n) { int i; @@ -155,31 +142,20 @@ { test_base(v1, v2, v3, 0); } -// CK2: call void @__omp_offloading_[[OFFLOAD_1:.+]]({{.+}}) #pragma omp target { test_base(v1, v2, v3, 0); } -// CK2: call void @__omp_offloading_[[OFFLOAD_2:.+]]({{.+}}) #pragma omp parallel { test_base(v1, v2, v3, 0); } -// CK2: call void ({{.+}}) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32****, i32****, i32****)* [[PARALLEL_REGION:@.+]] to void } -// CK2: define internal void @__omp_offloading_[[OFFLOAD_1]]({{.+}}) -// CK2: call void ({{.+}}) @__kmpc_fork_teams(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, i32****, i32****, i32****)* [[TARGET_REGION_1:@.+]] to void -// CK2: define internal void [[TARGET_REGION_1]]( -// CK2: call void @test_teams -// CK2: define internal void @__omp_offloading_[[OFFLOAD_2]]({{.+}}) -// CK2: call void @test_target -// CK2: define internal void [[PARALLEL_REGION]]( -// CK2: call void @test_parallel #endif // CK2 @@ -221,7 +197,6 @@ return v1[idx] * v2[idx]; } -// CK3-LABEL: define {{[^@]+}}@test void test(void) { int v1[N], v2[N], v3[N]; @@ -236,14 +211,12 @@ for (int i = 0; i < N; i++) { v3[i] = t(v1, v2, v3, i); } -// CK3: call = call i32 @t_simd #pragma omp for for (int i = 0; i < N; i++) { v3[i] = t(v1, v2, v3, i); } -// CK3: call{{.+}} = call i32 @t_for } #endif // CK3 @@ -295,7 +268,6 @@ } #pragma omp end declare target -// CK4-LABEL: define {{[^@]+}}@test void test(void) { int v1[N], v2[N], v3[N]; @@ -312,23 +284,736 @@ for (int i = 0; i < N; i++) vxv(v1, v2, v3, N); } -// CK4: call void @__omp_offloading_[[OFFLOAD_1:.+]]({{.+}}) #pragma omp simd for (int i = 0; i < N; i++) vxv(v1, v2, v3, N); -// CK4: call void @vxv #pragma omp target teams distribute parallel for simd map(from: v3[:N]) for (int i = 0; i < N; i++) for (int i = 0; i < N; i++) for (int i = 0; i < N; i++) vxv(v1, v2, v3, N); -// CK4: call void @__omp_offloading_[[OFFLOAD_2:.+]]({{.+}}) } -// CK4-DAG: call void @all_vxv -// CK4-DAG: call void @combined_vxv #endif // CK4 #endif // HEADER +// CK1-LABEL: define {{[^@]+}}@p_vxv +// CK1-SAME: (i32* noundef [[V1:%.*]], i32* noundef [[V2:%.*]], i32* noundef [[V3:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] { +// CK1-NEXT: entry: +// CK1-NEXT: [[V1_ADDR:%.*]] = alloca i32*, align 8 +// CK1-NEXT: [[V2_ADDR:%.*]] = alloca i32*, align 8 +// CK1-NEXT: [[V3_ADDR:%.*]] = alloca i32*, align 8 +// CK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) +// CK1-NEXT: store i32* [[V1]], i32** [[V1_ADDR]], align 8 +// CK1-NEXT: store i32* [[V2]], i32** [[V2_ADDR]], align 8 +// CK1-NEXT: store i32* [[V3]], i32** [[V3_ADDR]], align 8 +// CK1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CK1-NEXT: store i32 0, i32* [[I]], align 4 +// CK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CK1: omp.precond.then: +// CK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CK1: cond.true: +// CK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CK1-NEXT: br label [[COND_END:%.*]] +// CK1: cond.false: +// CK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CK1-NEXT: br label [[COND_END]] +// CK1: cond.end: +// CK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CK1: omp.inner.for.cond: +// CK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CK1: omp.inner.for.body: +// CK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CK1-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[V1_ADDR]], align 8 +// CK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[I3]], align 4 +// CK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 +// CK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i64 [[IDXPROM]] +// CK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[V2_ADDR]], align 8 +// CK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 +// CK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP17]] to i64 +// CK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i64 [[IDXPROM6]] +// CK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4 +// CK1-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP15]], [[TMP18]] +// CK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[MUL8]], 3 +// CK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[V3_ADDR]], align 8 +// CK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 +// CK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP20]] to i64 +// CK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i64 [[IDXPROM10]] +// CK1-NEXT: store i32 [[MUL9]], i32* [[ARRAYIDX11]], align 4 +// CK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CK1: omp.body.continue: +// CK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CK1: omp.inner.for.inc: +// CK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP21]], 1 +// CK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 +// CK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CK1: omp.inner.for.end: +// CK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CK1: omp.loop.exit: +// CK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +// CK1-NEXT: br label [[OMP_PRECOND_END]] +// CK1: omp.precond.end: +// CK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]]) +// CK1-NEXT: ret void +// +// +// +// +// CK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_test_l62 +// CK1-SAME: ([100 x i32]* noundef nonnull align 4 dereferenceable(400) [[V1:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[V2:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[V3:%.*]]) #[[ATTR3:[0-9]+]] { +// CK1-NEXT: entry: +// CK1-NEXT: [[V1_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CK1-NEXT: [[V2_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CK1-NEXT: [[V3_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CK1-NEXT: store [100 x i32]* [[V1]], [100 x i32]** [[V1_ADDR]], align 8 +// CK1-NEXT: store [100 x i32]* [[V2]], [100 x i32]** [[V2_ADDR]], align 8 +// CK1-NEXT: store [100 x i32]* [[V3]], [100 x i32]** [[V3_ADDR]], align 8 +// CK1-NEXT: [[TMP0:%.*]] = load [100 x i32]*, [100 x i32]** [[V1_ADDR]], align 8 +// CK1-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[V2_ADDR]], align 8 +// CK1-NEXT: [[TMP2:%.*]] = load [100 x i32]*, [100 x i32]** [[V3_ADDR]], align 8 +// CK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CK1-NEXT: store [100 x i32]* [[TMP0]], [100 x i32]** [[TMP3]], align 8 +// CK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CK1-NEXT: store [100 x i32]* [[TMP1]], [100 x i32]** [[TMP4]], align 8 +// CK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CK1-NEXT: store [100 x i32]* [[TMP2]], [100 x i32]** [[TMP5]], align 8 +// CK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// CK1-NEXT: ret void +// +// +// CK1-LABEL: define {{[^@]+}}@.omp_outlined. +// CK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CK1-NEXT: entry: +// CK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CK1-NEXT: [[TMP2:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP1]], align 8 +// CK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CK1-NEXT: [[TMP4:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP3]], align 8 +// CK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CK1-NEXT: [[TMP6:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP5]], align 8 +// CK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP2]], i64 0, i64 0 +// CK1-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP4]], i64 0, i64 0 +// CK1-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP6]], i64 0, i64 0 +// CK1-NEXT: call void @t_vxv(i32* noundef [[ARRAYDECAY]], i32* noundef [[ARRAYDECAY1]], i32* noundef [[ARRAYDECAY2]], i32 noundef 100) +// CK1-NEXT: ret void +// +// +// CK1-LABEL: define {{[^@]+}}@.omp_outlined..1 +// CK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CK1-NEXT: entry: +// CK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CK1-NEXT: [[TMP2:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP1]], align 8 +// CK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CK1-NEXT: [[TMP4:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP3]], align 8 +// CK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CK1-NEXT: [[TMP6:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP5]], align 8 +// CK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP2]], i64 0, i64 0 +// CK1-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP4]], i64 0, i64 0 +// CK1-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP6]], i64 0, i64 0 +// CK1-NEXT: call void @p_vxv(i32* noundef [[ARRAYDECAY]], i32* noundef [[ARRAYDECAY1]], i32* noundef [[ARRAYDECAY2]], i32 noundef 100) +// CK1-NEXT: ret void +// +// +// +// +// +// CK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK1-SAME: () #[[ATTR4:[0-9]+]] section ".text.startup" { +// CK1-NEXT: entry: +// CK1-NEXT: call void @__tgt_register_requires(i64 1) +// CK1-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// CK2-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK2-SAME: () #[[ATTR4:[0-9]+]] section ".text.startup" { +// CK2-NEXT: entry: +// CK2-NEXT: call void @__tgt_register_requires(i64 1) +// CK2-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// CK3-LABEL: define {{[^@]+}}@t +// CK3-SAME: (i32* noundef [[V1:%.*]], i32* noundef [[V2:%.*]], i32* noundef [[V3:%.*]], i32 noundef [[IDX:%.*]]) #[[ATTR0:[0-9]+]] { +// CK3-NEXT: entry: +// CK3-NEXT: [[V1_ADDR:%.*]] = alloca i32*, align 8 +// CK3-NEXT: [[V2_ADDR:%.*]] = alloca i32*, align 8 +// CK3-NEXT: [[V3_ADDR:%.*]] = alloca i32*, align 8 +// CK3-NEXT: [[IDX_ADDR:%.*]] = alloca i32, align 4 +// CK3-NEXT: store i32* [[V1]], i32** [[V1_ADDR]], align 8 +// CK3-NEXT: store i32* [[V2]], i32** [[V2_ADDR]], align 8 +// CK3-NEXT: store i32* [[V3]], i32** [[V3_ADDR]], align 8 +// CK3-NEXT: store i32 [[IDX]], i32* [[IDX_ADDR]], align 4 +// CK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[V1_ADDR]], align 8 +// CK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[IDX_ADDR]], align 4 +// CK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 +// CK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 [[IDXPROM]] +// CK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[V2_ADDR]], align 8 +// CK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[IDX_ADDR]], align 4 +// CK3-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP4]] to i64 +// CK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 [[IDXPROM1]] +// CK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +// CK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP5]] +// CK3-NEXT: ret i32 [[MUL]] +// +// +// CK3-LABEL: define {{[^@]+}}@t_for +// CK3-SAME: (i32* noundef [[V1:%.*]], i32* noundef [[V2:%.*]], i32* noundef [[V3:%.*]], i32 noundef [[IDX:%.*]]) #[[ATTR0]] { +// CK3-NEXT: entry: +// CK3-NEXT: [[V1_ADDR:%.*]] = alloca i32*, align 8 +// CK3-NEXT: [[V2_ADDR:%.*]] = alloca i32*, align 8 +// CK3-NEXT: [[V3_ADDR:%.*]] = alloca i32*, align 8 +// CK3-NEXT: [[IDX_ADDR:%.*]] = alloca i32, align 4 +// CK3-NEXT: store i32* [[V1]], i32** [[V1_ADDR]], align 8 +// CK3-NEXT: store i32* [[V2]], i32** [[V2_ADDR]], align 8 +// CK3-NEXT: store i32* [[V3]], i32** [[V3_ADDR]], align 8 +// CK3-NEXT: store i32 [[IDX]], i32* [[IDX_ADDR]], align 4 +// CK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[V1_ADDR]], align 8 +// CK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[IDX_ADDR]], align 4 +// CK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 +// CK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 [[IDXPROM]] +// CK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[V2_ADDR]], align 8 +// CK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[IDX_ADDR]], align 4 +// CK3-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP4]] to i64 +// CK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 [[IDXPROM1]] +// CK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +// CK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP5]] +// CK3-NEXT: ret i32 [[MUL]] +// +// +// CK3-LABEL: define {{[^@]+}}@t_simd +// CK3-SAME: (i32* noundef [[V1:%.*]], i32* noundef [[V2:%.*]], i32* noundef [[V3:%.*]], i32 noundef [[IDX:%.*]]) #[[ATTR1:[0-9]+]] { +// CK3-NEXT: entry: +// CK3-NEXT: [[V1_ADDR:%.*]] = alloca i32*, align 8 +// CK3-NEXT: [[V2_ADDR:%.*]] = alloca i32*, align 8 +// CK3-NEXT: [[V3_ADDR:%.*]] = alloca i32*, align 8 +// CK3-NEXT: [[IDX_ADDR:%.*]] = alloca i32, align 4 +// CK3-NEXT: store i32* [[V1]], i32** [[V1_ADDR]], align 8 +// CK3-NEXT: store i32* [[V2]], i32** [[V2_ADDR]], align 8 +// CK3-NEXT: store i32* [[V3]], i32** [[V3_ADDR]], align 8 +// CK3-NEXT: store i32 [[IDX]], i32* [[IDX_ADDR]], align 4 +// CK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[V1_ADDR]], align 8 +// CK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[IDX_ADDR]], align 4 +// CK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64 +// CK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 [[IDXPROM]] +// CK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[V2_ADDR]], align 8 +// CK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[IDX_ADDR]], align 4 +// CK3-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP4]] to i64 +// CK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 [[IDXPROM1]] +// CK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +// CK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], [[TMP5]] +// CK3-NEXT: ret i32 [[MUL]] +// +// +// CK3-LABEL: define {{[^@]+}}@test +// CK3-SAME: () #[[ATTR0]] { +// CK3-NEXT: entry: +// CK3-NEXT: [[V1:%.*]] = alloca [100 x i32], align 16 +// CK3-NEXT: [[V2:%.*]] = alloca [100 x i32], align 16 +// CK3-NEXT: [[V3:%.*]] = alloca [100 x i32], align 16 +// CK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CK3-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CK3-NEXT: [[DOTOMP_IV14:%.*]] = alloca i32, align 4 +// CK3-NEXT: [[_TMP15:%.*]] = alloca i32, align 4 +// CK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CK3-NEXT: [[I16:%.*]] = alloca i32, align 4 +// CK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) +// CK3-NEXT: store i32 0, i32* [[I]], align 4 +// CK3-NEXT: br label [[FOR_COND:%.*]] +// CK3: for.cond: +// CK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[I]], align 4 +// CK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 100 +// CK3-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CK3: for.body: +// CK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[I]], align 4 +// CK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 +// CK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[I]], align 4 +// CK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP3]] to i64 +// CK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[V1]], i64 0, i64 [[IDXPROM]] +// CK3-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4 +// CK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[I]], align 4 +// CK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 +// CK3-NEXT: [[SUB:%.*]] = sub nsw i32 0, [[ADD1]] +// CK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[I]], align 4 +// CK3-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP5]] to i64 +// CK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[V2]], i64 0, i64 [[IDXPROM2]] +// CK3-NEXT: store i32 [[SUB]], i32* [[ARRAYIDX3]], align 4 +// CK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[I]], align 4 +// CK3-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP6]] to i64 +// CK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[V3]], i64 0, i64 [[IDXPROM4]] +// CK3-NEXT: store i32 0, i32* [[ARRAYIDX5]], align 4 +// CK3-NEXT: br label [[FOR_INC:%.*]] +// CK3: for.inc: +// CK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4 +// CK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CK3-NEXT: store i32 [[INC]], i32* [[I]], align 4 +// CK3-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CK3: for.end: +// CK3-NEXT: store i32 0, i32* [[DOTOMP_IV]], align 4 +// CK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CK3: omp.inner.for.cond: +// CK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CK3-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP8]], 100 +// CK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CK3: omp.inner.for.body: +// CK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CK3-NEXT: store i32 [[ADD8]], i32* [[I6]], align 4, !llvm.access.group !5 +// CK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[V1]], i64 0, i64 0 +// CK3-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[V2]], i64 0, i64 0 +// CK3-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[V3]], i64 0, i64 0 +// CK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !5 +// CK3-NEXT: [[CALL:%.*]] = call i32 @t_simd(i32* noundef [[ARRAYDECAY]], i32* noundef [[ARRAYDECAY9]], i32* noundef [[ARRAYDECAY10]], i32 noundef [[TMP10]]), !llvm.access.group !5 +// CK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !5 +// CK3-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP11]] to i64 +// CK3-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[V3]], i64 0, i64 [[IDXPROM11]] +// CK3-NEXT: store i32 [[CALL]], i32* [[ARRAYIDX12]], align 4, !llvm.access.group !5 +// CK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CK3: omp.body.continue: +// CK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CK3: omp.inner.for.inc: +// CK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP12]], 1 +// CK3-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CK3: omp.inner.for.end: +// CK3-NEXT: store i32 100, i32* [[I6]], align 4 +// CK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 +// CK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CK3-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[TMP13]], 99 +// CK3-NEXT: br i1 [[CMP17]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CK3: cond.true: +// CK3-NEXT: br label [[COND_END:%.*]] +// CK3: cond.false: +// CK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CK3-NEXT: br label [[COND_END]] +// CK3: cond.end: +// CK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV14]], align 4 +// CK3-NEXT: br label [[OMP_INNER_FOR_COND18:%.*]] +// CK3: omp.inner.for.cond18: +// CK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV14]], align 4 +// CK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CK3-NEXT: [[CMP19:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CK3-NEXT: br i1 [[CMP19]], label [[OMP_INNER_FOR_BODY20:%.*]], label [[OMP_INNER_FOR_END32:%.*]] +// CK3: omp.inner.for.body20: +// CK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV14]], align 4 +// CK3-NEXT: [[MUL21:%.*]] = mul nsw i32 [[TMP18]], 1 +// CK3-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] +// CK3-NEXT: store i32 [[ADD22]], i32* [[I16]], align 4 +// CK3-NEXT: [[ARRAYDECAY23:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[V1]], i64 0, i64 0 +// CK3-NEXT: [[ARRAYDECAY24:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[V2]], i64 0, i64 0 +// CK3-NEXT: [[ARRAYDECAY25:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[V3]], i64 0, i64 0 +// CK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I16]], align 4 +// CK3-NEXT: [[CALL26:%.*]] = call i32 @t_for(i32* noundef [[ARRAYDECAY23]], i32* noundef [[ARRAYDECAY24]], i32* noundef [[ARRAYDECAY25]], i32 noundef [[TMP19]]) +// CK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I16]], align 4 +// CK3-NEXT: [[IDXPROM27:%.*]] = sext i32 [[TMP20]] to i64 +// CK3-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[V3]], i64 0, i64 [[IDXPROM27]] +// CK3-NEXT: store i32 [[CALL26]], i32* [[ARRAYIDX28]], align 4 +// CK3-NEXT: br label [[OMP_BODY_CONTINUE29:%.*]] +// CK3: omp.body.continue29: +// CK3-NEXT: br label [[OMP_INNER_FOR_INC30:%.*]] +// CK3: omp.inner.for.inc30: +// CK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV14]], align 4 +// CK3-NEXT: [[ADD31:%.*]] = add nsw i32 [[TMP21]], 1 +// CK3-NEXT: store i32 [[ADD31]], i32* [[DOTOMP_IV14]], align 4 +// CK3-NEXT: br label [[OMP_INNER_FOR_COND18]] +// CK3: omp.inner.for.end32: +// CK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CK3: omp.loop.exit: +// CK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +// CK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]]) +// CK3-NEXT: ret void +// +// +// +// +// +// +// +// CK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_test_l309 +// CK4-SAME: ([100 x i32]* noundef nonnull align 4 dereferenceable(400) [[V1:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[V2:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[V3:%.*]]) #[[ATTR1:[0-9]+]] { +// CK4-NEXT: entry: +// CK4-NEXT: [[V1_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CK4-NEXT: [[V2_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CK4-NEXT: [[V3_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CK4-NEXT: store [100 x i32]* [[V1]], [100 x i32]** [[V1_ADDR]], align 8 +// CK4-NEXT: store [100 x i32]* [[V2]], [100 x i32]** [[V2_ADDR]], align 8 +// CK4-NEXT: store [100 x i32]* [[V3]], [100 x i32]** [[V3_ADDR]], align 8 +// CK4-NEXT: [[TMP0:%.*]] = load [100 x i32]*, [100 x i32]** [[V1_ADDR]], align 8 +// CK4-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[V2_ADDR]], align 8 +// CK4-NEXT: [[TMP2:%.*]] = load [100 x i32]*, [100 x i32]** [[V3_ADDR]], align 8 +// CK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CK4-NEXT: store [100 x i32]* [[TMP0]], [100 x i32]** [[TMP3]], align 8 +// CK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CK4-NEXT: store [100 x i32]* [[TMP1]], [100 x i32]** [[TMP4]], align 8 +// CK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CK4-NEXT: store [100 x i32]* [[TMP2]], [100 x i32]** [[TMP5]], align 8 +// CK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// CK4-NEXT: ret void +// +// +// CK4-LABEL: define {{[^@]+}}@.omp_outlined. +// CK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { +// CK4-NEXT: entry: +// CK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CK4-NEXT: [[TMP2:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP1]], align 8 +// CK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CK4-NEXT: [[TMP4:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP3]], align 8 +// CK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CK4-NEXT: [[TMP6:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP5]], align 8 +// CK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CK4-NEXT: store [100 x i32]* [[TMP2]], [100 x i32]** [[TMP7]], align 8 +// CK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CK4-NEXT: store [100 x i32]* [[TMP4]], [100 x i32]** [[TMP8]], align 8 +// CK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CK4-NEXT: store [100 x i32]* [[TMP6]], [100 x i32]** [[TMP9]], align 8 +// CK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) +// CK4-NEXT: ret void +// +// +// CK4-LABEL: define {{[^@]+}}@.omp_outlined..1 +// CK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { +// CK4-NEXT: entry: +// CK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CK4-NEXT: [[I:%.*]] = alloca i32, align 4 +// CK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CK4-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CK4-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CK4-NEXT: [[TMP2:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP1]], align 8 +// CK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CK4-NEXT: [[TMP4:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP3]], align 8 +// CK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CK4-NEXT: [[TMP6:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP5]], align 8 +// CK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CK4-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 +// CK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 +// CK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CK4: cond.true: +// CK4-NEXT: br label [[COND_END:%.*]] +// CK4: cond.false: +// CK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CK4-NEXT: br label [[COND_END]] +// CK4: cond.end: +// CK4-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CK4-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CK4: omp.inner.for.cond: +// CK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CK4: omp.inner.for.body: +// CK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CK4-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP2]], i64 0, i64 0 +// CK4-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP4]], i64 0, i64 0 +// CK4-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP6]], i64 0, i64 0 +// CK4-NEXT: call void @combined_vxv(i32* noundef [[ARRAYDECAY]], i32* noundef [[ARRAYDECAY2]], i32* noundef [[ARRAYDECAY3]], i32 noundef 100) +// CK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CK4: omp.body.continue: +// CK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CK4: omp.inner.for.inc: +// CK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CK4-NEXT: br label [[OMP_INNER_FOR_COND]] +// CK4: omp.inner.for.end: +// CK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CK4: omp.loop.exit: +// CK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CK4-NEXT: ret void +// +// +// CK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_test_l322 +// CK4-SAME: ([100 x i32]* noundef nonnull align 4 dereferenceable(400) [[V1:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[V2:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[V3:%.*]]) #[[ATTR1]] { +// CK4-NEXT: entry: +// CK4-NEXT: [[V1_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CK4-NEXT: [[V2_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CK4-NEXT: [[V3_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CK4-NEXT: store [100 x i32]* [[V1]], [100 x i32]** [[V1_ADDR]], align 8 +// CK4-NEXT: store [100 x i32]* [[V2]], [100 x i32]** [[V2_ADDR]], align 8 +// CK4-NEXT: store [100 x i32]* [[V3]], [100 x i32]** [[V3_ADDR]], align 8 +// CK4-NEXT: [[TMP0:%.*]] = load [100 x i32]*, [100 x i32]** [[V1_ADDR]], align 8 +// CK4-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[V2_ADDR]], align 8 +// CK4-NEXT: [[TMP2:%.*]] = load [100 x i32]*, [100 x i32]** [[V3_ADDR]], align 8 +// CK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CK4-NEXT: store [100 x i32]* [[TMP0]], [100 x i32]** [[TMP3]], align 8 +// CK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CK4-NEXT: store [100 x i32]* [[TMP1]], [100 x i32]** [[TMP4]], align 8 +// CK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CK4-NEXT: store [100 x i32]* [[TMP2]], [100 x i32]** [[TMP5]], align 8 +// CK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) +// CK4-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// +// CK4-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK4-SAME: () #[[ATTR3:[0-9]+]] section ".text.startup" { +// CK4-NEXT: entry: +// CK4-NEXT: call void @__tgt_register_requires(i64 1) +// CK4-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// diff --git a/clang/test/OpenMP/distribute_codegen.cpp b/clang/test/OpenMP/distribute_codegen.cpp --- a/clang/test/OpenMP/distribute_codegen.cpp +++ b/clang/test/OpenMP/distribute_codegen.cpp @@ -187,23 +187,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK1-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined. to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -213,79 +219,81 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX3]], align 4 -// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK1-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4 -// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK1-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]] +// CHECK1-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM2]] +// CHECK1-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK1-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM8]] // CHECK1-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -360,23 +368,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK1-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -386,79 +400,81 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK1-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX3]], align 4 -// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK1-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4 -// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK1-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]] +// CHECK1-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM2]] +// CHECK1-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK1-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM8]] // CHECK1-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -533,23 +549,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK1-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..4 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -559,96 +581,98 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK1-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK1-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] +// CHECK1-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM3]] +// CHECK1-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP25]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP27:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK1-NEXT: [[TMP28:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP28]], i64 [[IDXPROM9]] // CHECK1-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -706,18 +730,21 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i8* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i8* [[CONV]], i8** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 @@ -730,59 +757,61 @@ // CHECK1-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i8* [[A]], i8** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8*, i8** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[TMP0]], align 1 -// CHECK1-NEXT: store i8 [[TMP1]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 1 +// CHECK1-NEXT: store i8 [[TMP3]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP4]] to i32 // CHECK1-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK1-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: store i8 [[TMP3]], i8* [[I]], align 1 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP5]], i8* [[I]], align 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i8 [[TMP6]] to i32 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV8:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV8:%.*]] = sext i8 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] // CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 // CHECK1-NEXT: store i8 [[CONV10]], i8* [[I5]], align 1 @@ -790,16 +819,16 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -851,18 +880,21 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i16* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i16* [[CONV]], i16** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -872,70 +904,72 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) // CHECK1-NEXT: ret void // // @@ -1017,23 +1051,29 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca float*, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca float*, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 4 // CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 4 // CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 4 // CHECK3-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined. to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca float**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca float**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca float**, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca float**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1043,75 +1083,77 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store float** [[A]], float*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store float** [[B]], float*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store float** [[C]], float*** [[C_ADDR]], align 4 -// CHECK3-NEXT: store float** [[D]], float*** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP15]], i32 [[TMP16]] -// CHECK3-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX2]], align 4 -// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK3-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP18]], i32 [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX4]], align 4 -// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK3-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP21]], i32 [[TMP22]] +// CHECK3-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] +// CHECK3-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] +// CHECK3-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK3-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] +// CHECK3-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK3-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP26]], i32 [[TMP27]] // CHECK3-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1186,23 +1228,29 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca float*, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca float*, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 4 // CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 4 // CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 4 // CHECK3-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca float**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca float**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca float**, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca float**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1212,75 +1260,77 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store float** [[A]], float*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store float** [[B]], float*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store float** [[C]], float*** [[C_ADDR]], align 4 -// CHECK3-NEXT: store float** [[D]], float*** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK3-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP15]], i32 [[TMP16]] -// CHECK3-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX2]], align 4 -// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK3-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP18]], i32 [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX4]], align 4 -// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK3-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP21]], i32 [[TMP22]] +// CHECK3-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] +// CHECK3-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] +// CHECK3-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK3-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] +// CHECK3-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK3-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP26]], i32 [[TMP27]] // CHECK3-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1355,23 +1405,29 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca float*, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca float*, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 4 // CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 4 // CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 4 // CHECK3-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..4 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca float**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca float**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca float**, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca float**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1381,92 +1437,94 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store float** [[A]], float*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store float** [[B]], float*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store float** [[C]], float*** [[C_ADDR]], align 4 -// CHECK3-NEXT: store float** [[D]], float*** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK3-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 [[TMP15]] -// CHECK3-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] -// CHECK3-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK3-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] +// CHECK3-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP4]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP19]], i32 [[TMP20]] +// CHECK3-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP6]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 [[TMP23]] +// CHECK3-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[MUL4:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK3-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP8]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP25]], i32 [[TMP26]] +// CHECK3-NEXT: [[TMP27:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP27]] +// CHECK3-NEXT: [[TMP28:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP28]], i32 [[TMP29]] // CHECK3-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP30]], 1 // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1524,18 +1582,21 @@ // CHECK3-SAME: (i32 noundef [[A:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[A_ADDR]] to i8* -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i8* [[CONV]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i8* [[CONV]], i8** [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 @@ -1548,59 +1609,61 @@ // CHECK3-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i8* [[A]], i8** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i8*, i8** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i8, i8* [[TMP0]], align 1 -// CHECK3-NEXT: store i8 [[TMP1]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 1 +// CHECK3-NEXT: store i8 [[TMP3]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP4]] to i32 // CHECK3-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK3-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK3-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: store i8 [[TMP3]], i8* [[I]], align 1 -// CHECK3-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV4:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: store i8 [[TMP5]], i8* [[I]], align 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV4:%.*]] = sext i8 [[TMP6]] to i32 // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP17]] to i32 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] // CHECK3-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 // CHECK3-NEXT: store i8 [[CONV10]], i8* [[I5]], align 1 @@ -1608,16 +1671,16 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -1669,18 +1732,21 @@ // CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i16* [[CONV]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i16* [[CONV]], i16** [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1690,70 +1756,72 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) // CHECK3-NEXT: ret void // // @@ -1771,23 +1839,29 @@ // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK17-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK17-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK17-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined. to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1797,79 +1871,81 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK17-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK17-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK17-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]] -// CHECK17-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM2]] -// CHECK17-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX3]], align 4 -// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK17-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]] -// CHECK17-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4 -// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK17-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]] +// CHECK17-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM2]] +// CHECK17-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK17-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM5]] +// CHECK17-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK17-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM8]] // CHECK17-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -1880,23 +1956,29 @@ // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK17-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK17-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK17-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1906,79 +1988,81 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK17-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK17-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK17-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK17-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]] -// CHECK17-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM2]] -// CHECK17-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX3]], align 4 -// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK17-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]] -// CHECK17-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4 -// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK17-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]] +// CHECK17-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM2]] +// CHECK17-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK17-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM5]] +// CHECK17-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK17-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM8]] // CHECK17-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -1989,23 +2073,29 @@ // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK17-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK17-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK17-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK17-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..2 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2015,96 +2105,98 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK17-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK17-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK17-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK17-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !11 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK17-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !11 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK17-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK17-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !11 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK17-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK17-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !11 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] +// CHECK17-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !11 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !11 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM3]] +// CHECK17-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK17-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !11 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP25]], i64 [[IDXPROM6]] +// CHECK17-NEXT: [[TMP27:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK17-NEXT: [[TMP28:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !11 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP28]], i64 [[IDXPROM9]] // CHECK17-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !11 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK17-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK17-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK17-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -2112,18 +2204,21 @@ // CHECK17-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i8* [[CONV]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i8* [[CONV]], i8** [[TMP0]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 @@ -2136,59 +2231,61 @@ // CHECK17-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i8* [[A]], i8** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i8*, i8** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i8, i8* [[TMP0]], align 1 -// CHECK17-NEXT: store i8 [[TMP1]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK17-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 1 +// CHECK17-NEXT: store i8 [[TMP3]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[CONV:%.*]] = sext i8 [[TMP4]] to i32 // CHECK17-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK17-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK17-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: store i8 [[TMP3]], i8* [[I]], align 1 -// CHECK17-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[CONV4:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK17-NEXT: [[TMP5:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: store i8 [[TMP5]], i8* [[I]], align 1 +// CHECK17-NEXT: [[TMP6:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[CONV4:%.*]] = sext i8 [[TMP6]] to i32 // CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[CONV8:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[CONV8:%.*]] = sext i8 [[TMP17]] to i32 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] // CHECK17-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 // CHECK17-NEXT: store i8 [[CONV10]], i8* [[I5]], align 1 @@ -2196,16 +2293,16 @@ // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK17-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK17-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void @@ -2215,18 +2312,21 @@ // CHECK17-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i16* [[CONV]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i16* [[CONV]], i16** [[TMP0]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2236,70 +2336,72 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) // CHECK17-NEXT: ret void // // @@ -2310,23 +2412,29 @@ // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca float*, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca float*, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store float* [[A]], float** [[A_ADDR]], align 4 // CHECK19-NEXT: store float* [[B]], float** [[B_ADDR]], align 4 // CHECK19-NEXT: store float* [[C]], float** [[C_ADDR]], align 4 // CHECK19-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined. to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca float**, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca float**, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca float**, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca float**, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2336,75 +2444,77 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store float** [[A]], float*** [[A_ADDR]], align 4 -// CHECK19-NEXT: store float** [[B]], float*** [[B_ADDR]], align 4 -// CHECK19-NEXT: store float** [[C]], float*** [[C_ADDR]], align 4 -// CHECK19-NEXT: store float** [[D]], float*** [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 [[TMP13]] -// CHECK19-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP15]], i32 [[TMP16]] -// CHECK19-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX2]], align 4 -// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK19-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP18]], i32 [[TMP19]] -// CHECK19-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX4]], align 4 -// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK19-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP21]], i32 [[TMP22]] +// CHECK19-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] +// CHECK19-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK19-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] +// CHECK19-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK19-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP26]], i32 [[TMP27]] // CHECK19-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -2415,23 +2525,29 @@ // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca float*, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca float*, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store float* [[A]], float** [[A_ADDR]], align 4 // CHECK19-NEXT: store float* [[B]], float** [[B_ADDR]], align 4 // CHECK19-NEXT: store float* [[C]], float** [[C_ADDR]], align 4 // CHECK19-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca float**, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca float**, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca float**, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca float**, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2441,75 +2557,77 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store float** [[A]], float*** [[A_ADDR]], align 4 -// CHECK19-NEXT: store float** [[B]], float*** [[B_ADDR]], align 4 -// CHECK19-NEXT: store float** [[C]], float*** [[C_ADDR]], align 4 -// CHECK19-NEXT: store float** [[D]], float*** [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK19-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 [[TMP13]] -// CHECK19-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP15]], i32 [[TMP16]] -// CHECK19-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX2]], align 4 -// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK19-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP18]], i32 [[TMP19]] -// CHECK19-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX4]], align 4 -// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK19-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP21]], i32 [[TMP22]] +// CHECK19-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] +// CHECK19-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK19-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] +// CHECK19-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK19-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP26]], i32 [[TMP27]] // CHECK19-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -2520,23 +2638,29 @@ // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca float*, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca float*, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store float* [[A]], float** [[A_ADDR]], align 4 // CHECK19-NEXT: store float* [[B]], float** [[B_ADDR]], align 4 // CHECK19-NEXT: store float* [[C]], float** [[C_ADDR]], align 4 // CHECK19-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..2 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca float**, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca float**, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca float**, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca float**, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2546,92 +2670,94 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store float** [[A]], float*** [[A_ADDR]], align 4 -// CHECK19-NEXT: store float** [[B]], float*** [[B_ADDR]], align 4 -// CHECK19-NEXT: store float** [[C]], float*** [[C_ADDR]], align 4 -// CHECK19-NEXT: store float** [[D]], float*** [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK19-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 [[TMP15]] -// CHECK19-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] -// CHECK19-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK19-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] -// CHECK19-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK19-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] +// CHECK19-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP4]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP19]], i32 [[TMP20]] +// CHECK19-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP6]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 [[TMP23]] +// CHECK19-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[MUL4:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK19-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP8]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP25]], i32 [[TMP26]] +// CHECK19-NEXT: [[TMP27:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP27]] +// CHECK19-NEXT: [[TMP28:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP28]], i32 [[TMP29]] // CHECK19-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !12 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[ADD8:%.*]] = add i32 [[TMP30]], 1 // CHECK19-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD9:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD9:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK19-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD10:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD10:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK19-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -2639,18 +2765,21 @@ // CHECK19-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[A_ADDR]] to i8* -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i8* [[CONV]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i8* [[CONV]], i8** [[TMP0]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 @@ -2663,59 +2792,61 @@ // CHECK19-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i8* [[A]], i8** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i8*, i8** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i8, i8* [[TMP0]], align 1 -// CHECK19-NEXT: store i8 [[TMP1]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK19-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 1 +// CHECK19-NEXT: store i8 [[TMP3]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[CONV:%.*]] = sext i8 [[TMP4]] to i32 // CHECK19-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK19-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK19-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: store i8 [[TMP3]], i8* [[I]], align 1 -// CHECK19-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[CONV4:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK19-NEXT: [[TMP5:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: store i8 [[TMP5]], i8* [[I]], align 1 +// CHECK19-NEXT: [[TMP6:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[CONV4:%.*]] = sext i8 [[TMP6]] to i32 // CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[CONV8:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[CONV8:%.*]] = sext i8 [[TMP17]] to i32 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] // CHECK19-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 // CHECK19-NEXT: store i8 [[CONV10]], i8* [[I5]], align 1 @@ -2723,16 +2854,16 @@ // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK19-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void @@ -2742,18 +2873,21 @@ // CHECK19-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i16* [[CONV]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i16* [[CONV]], i16** [[TMP0]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2763,69 +2897,71 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK19-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK19-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) // CHECK19-NEXT: ret void // diff --git a/clang/test/OpenMP/distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_firstprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_firstprivate_codegen.cpp @@ -176,6 +176,7 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 @@ -185,20 +186,25 @@ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* // CHECK1-NEXT: store double* [[CONV1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[CONV]], double* [[TMP0]], i32* [[CONV2]], float* [[CONV3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[CONV]], double** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP2]], double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[CONV3]], float** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -207,104 +213,106 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store double* [[G]], double** [[G_ADDR]], align 8 -// CHECK1-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store double* [[TMP1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[TMP4]], double** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK1-NEXT: store double* [[TMP4]], double** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP9]], double** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load volatile double, double* [[TMP0]], align 8 -// CHECK1-NEXT: store double [[TMP5]], double* [[G3]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load volatile double, double* [[TMP6]], align 8 -// CHECK1-NEXT: store double [[TMP7]], double* [[G14]], align 8 -// CHECK1-NEXT: store double* [[G14]], double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[SVAR6]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load float, float* [[TMP3]], align 4 -// CHECK1-NEXT: store float [[TMP9]], float* [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load volatile double, double* [[TMP2]], align 8 +// CHECK1-NEXT: store double [[TMP10]], double* [[G]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load volatile double, double* [[TMP11]], align 8 +// CHECK1-NEXT: store double [[TMP12]], double* [[G1]], align 8 +// CHECK1-NEXT: store double* [[G1]], double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load float, float* [[TMP8]], align 4 +// CHECK1-NEXT: store float [[TMP14]], float* [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load double, double* [[G3]], align 8 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP18]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD9]], double* [[G3]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load volatile double, double* [[TMP19]], align 8 -// CHECK1-NEXT: [[ADD10:%.*]] = fadd double [[TMP20]], 1.000000e+00 -// CHECK1-NEXT: store volatile double [[ADD10]], double* [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[SVAR6]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 3 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[SVAR6]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load float, float* [[SFVAR7]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP22]] to double -// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[CONV]], 4.000000e+00 -// CHECK1-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK1-NEXT: store float [[CONV13]], float* [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[G3]], double** [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP25:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK1-NEXT: store double* [[TMP25]], double** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[SVAR6]], i32** [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[SFVAR7]], float** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load double, double* [[G]], align 8 +// CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD5]], double* [[G]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load volatile double, double* [[TMP24]], align 8 +// CHECK1-NEXT: [[ADD6:%.*]] = fadd double [[TMP25]], 1.000000e+00 +// CHECK1-NEXT: store volatile double [[ADD6]], double* [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 3 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP27]] to double +// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 4.000000e+00 +// CHECK1-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float +// CHECK1-NEXT: store float [[CONV9]], float* [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[G]], double** [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load double*, double** [[_TMP3]], align 8 +// CHECK1-NEXT: store double* [[TMP30]], double** [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP32]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) // CHECK1-NEXT: ret void // // @@ -344,6 +352,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 @@ -358,20 +367,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, double* [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], double* [[G13]], align 8 // CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[G2]], double* [[TMP5]], i32* [[SVAR_ADDR]], float* [[CONV]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G2]], double** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load double*, double** [[_TMP4]], align 4 +// CHECK3-NEXT: store double* [[TMP7]], double** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[CONV]], float** [[TMP9]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -380,104 +394,106 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store double* [[TMP1]], double** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[TMP4]], double** [[_TMP1]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK3-NEXT: store double* [[TMP4]], double** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[TMP9]], double** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load volatile double, double* [[TMP0]], align 8 -// CHECK3-NEXT: store double [[TMP5]], double* [[G3]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[_TMP1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load volatile double, double* [[TMP6]], align 4 -// CHECK3-NEXT: store double [[TMP7]], double* [[G14]], align 8 -// CHECK3-NEXT: store double* [[G14]], double** [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[SVAR6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load float, float* [[TMP3]], align 4 -// CHECK3-NEXT: store float [[TMP9]], float* [[SFVAR7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load volatile double, double* [[TMP2]], align 8 +// CHECK3-NEXT: store double [[TMP10]], double* [[G]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load volatile double, double* [[TMP11]], align 4 +// CHECK3-NEXT: store double [[TMP12]], double* [[G1]], align 8 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load float, float* [[TMP8]], align 4 +// CHECK3-NEXT: store float [[TMP14]], float* [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load double, double* [[G3]], align 8 -// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP18]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD9]], double* [[G3]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = load double*, double** [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load volatile double, double* [[TMP19]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = fadd double [[TMP20]], 1.000000e+00 -// CHECK3-NEXT: store volatile double [[ADD10]], double* [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[SVAR6]], align 4 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 3 -// CHECK3-NEXT: store i32 [[ADD11]], i32* [[SVAR6]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load float, float* [[SFVAR7]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP22]] to double -// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[CONV]], 4.000000e+00 -// CHECK3-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK3-NEXT: store float [[CONV13]], float* [[SFVAR7]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G3]], double** [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP25:%.*]] = load double*, double** [[_TMP5]], align 4 -// CHECK3-NEXT: store double* [[TMP25]], double** [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR6]], i32** [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[SFVAR7]], float** [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD5]], double* [[G]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load volatile double, double* [[TMP24]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP25]], 1.000000e+00 +// CHECK3-NEXT: store volatile double [[ADD6]], double* [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 3 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP27]] to double +// CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 4.000000e+00 +// CHECK3-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float +// CHECK3-NEXT: store float [[CONV9]], float* [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP30:%.*]] = load double*, double** [[_TMP3]], align 4 +// CHECK3-NEXT: store double* [[TMP30]], double** [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP32]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK3-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) // CHECK3-NEXT: ret void // // @@ -629,6 +645,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -640,21 +657,27 @@ // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK9-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], [2 x i32]* [[TMP0]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[CONV1]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP8]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -663,126 +686,128 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP8:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP4:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: store %struct.S* [[TMP5]], %struct.S** [[_TMP1]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP11]], %struct.S** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: [[TMP8:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP7]], i8* align 4 [[TMP8]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP9:%.*]] = bitcast [2 x %struct.S]* [[TMP2]] to %struct.S* -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP10]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: [[TMP14:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP16]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP17:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP10]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done6: -// CHECK9-NEXT: [[TMP13:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK9-NEXT: [[TMP15:%.*]] = bitcast %struct.S* [[TMP13]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false) -// CHECK9-NEXT: store %struct.S* [[VAR7]], %struct.S** [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[SVAR9]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP18]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done3: +// CHECK9-NEXT: [[TMP19:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK9-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[TMP19]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) +// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP4]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[SVAR]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP24]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP25]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP25]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM11]] -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast %struct.S* [[ARRAYIDX12]] to i8* -// CHECK9-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[TMP27]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP32]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP31]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load %struct.S*, %struct.S** [[_TMP4]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK9-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* +// CHECK9-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[TMP33]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP40]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -952,6 +977,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 @@ -961,20 +987,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK9-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[CONV]], [2 x i32]* [[TMP0]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S.0* [[TMP7]], %struct.S.0** [[TMP6]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -983,121 +1014,123 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP8:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP4:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[_TMP1]], align 8 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 8 +// CHECK9-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S.0* [[TMP9]], %struct.S.0** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP6]], i8* align 4 [[TMP7]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP8:%.*]] = bitcast [2 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP9]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S.0]* [[TMP6]] to %struct.S.0* +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP8]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP10:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP11:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP10]], i8* align 4 [[TMP11]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP16:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done6: -// CHECK9-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[VAR7]] to i8* -// CHECK9-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false) -// CHECK9-NEXT: store %struct.S.0* [[VAR7]], %struct.S.0** [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done3: +// CHECK9-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// CHECK9-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[TMP17]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) +// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP4]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[ARRAYIDX11]] to i8* -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast %struct.S.0* [[TMP25]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP4]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK9-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[ARRAYIDX7]] to i8* +// CHECK9-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[TMP30]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]]) -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN13]], i64 2 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP32]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP37]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done14: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -1291,6 +1324,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1300,21 +1334,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 // CHECK11-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[T_VAR_ADDR]], [2 x i32]* [[TMP0]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1323,124 +1363,126 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP8:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP4:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP5]], %struct.S** [[_TMP1]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP11]], %struct.S** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[T_VAR3]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK11-NEXT: [[TMP8:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP7]], i8* align 4 [[TMP8]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP9:%.*]] = bitcast [2 x %struct.S]* [[TMP2]] to %struct.S* -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP10]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: [[TMP14:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP16]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP17:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP10]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done6: -// CHECK11-NEXT: [[TMP13:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK11-NEXT: [[TMP15:%.*]] = bitcast %struct.S* [[TMP13]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false) -// CHECK11-NEXT: store %struct.S* [[VAR7]], %struct.S** [[_TMP8]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[SVAR9]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP18]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done3: +// CHECK11-NEXT: [[TMP19:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK11-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[TMP19]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) +// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP4]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[SVAR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP24]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP25]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i32 0, i32 [[TMP26]] -// CHECK11-NEXT: store i32 [[TMP25]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[TMP27]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP32]] +// CHECK11-NEXT: store i32 [[TMP31]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load %struct.S*, %struct.S** [[_TMP4]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* +// CHECK11-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[TMP33]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK11-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP40]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -1609,6 +1651,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 @@ -1617,20 +1660,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 // CHECK11-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[T_VAR_ADDR]], [2 x i32]* [[TMP0]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S.0* [[TMP7]], %struct.S.0** [[TMP6]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1639,119 +1687,121 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP8:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP4:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[_TMP1]], align 4 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 4 +// CHECK11-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S.0* [[TMP9]], %struct.S.0** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[T_VAR3]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK11-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 [[TMP7]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP8:%.*]] = bitcast [2 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP9]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S.0]* [[TMP6]] to %struct.S.0* +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP8]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP10:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP11:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 [[TMP11]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP16:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done6: -// CHECK11-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[VAR7]] to i8* -// CHECK11-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false) -// CHECK11-NEXT: store %struct.S.0* [[VAR7]], %struct.S.0** [[_TMP8]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done3: +// CHECK11-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// CHECK11-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[TMP17]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) +// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP4]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i32 0, i32 [[TMP24]] -// CHECK11-NEXT: store i32 [[TMP23]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[ARRAYIDX10]] to i8* -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast %struct.S.0* [[TMP25]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP29]] +// CHECK11-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP4]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP31]] +// CHECK11-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* +// CHECK11-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[TMP30]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK11-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]]) -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP32]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP37]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_lastprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_lastprivate_codegen.cpp @@ -166,6 +166,7 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 @@ -175,20 +176,25 @@ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* // CHECK1-NEXT: store double* [[CONV1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[CONV]], double* [[TMP0]], i32* [[CONV2]], float* [[CONV3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[CONV]], double** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP2]], double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[CONV3]], float** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -197,99 +203,101 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store double* [[G]], double** [[G_ADDR]], align 8 -// CHECK1-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store double* [[TMP1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[TMP4]], double** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK1-NEXT: store double* [[TMP4]], double** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP9]], double** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load double*, double** [[_TMP1]], align 8 -// CHECK1-NEXT: store double* [[G14]], double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP1]], align 8 +// CHECK1-NEXT: store double* [[G1]], double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: store double 1.000000e+00, double* [[G3]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP14]], align 8 -// CHECK1-NEXT: store i32 3, i32* [[SVAR6]], align 4 -// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[G3]], double** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP17:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK1-NEXT: store double* [[TMP17]], double** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[SVAR6]], i32** [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[SFVAR7]], float** [[TMP19]], align 8 +// CHECK1-NEXT: store double 1.000000e+00, double* [[G]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load double*, double** [[_TMP3]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP19]], align 8 +// CHECK1-NEXT: store i32 3, i32* [[SVAR]], align 4 +// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[G]], double** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load double*, double** [[_TMP3]], align 8 +// CHECK1-NEXT: store double* [[TMP22]], double** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP24]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP23:%.*]] = load double, double* [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP23]], double* [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[TMP24]], align 8 -// CHECK1-NEXT: store volatile double [[TMP25]], double* [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load float, float* [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP27]], float* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load double, double* [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP28]], double* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load double*, double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load double, double* [[TMP29]], align 8 +// CHECK1-NEXT: store volatile double [[TMP30]], double* [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP31]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP32]], float* [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -331,6 +339,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 @@ -345,20 +354,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, double* [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], double* [[G13]], align 8 // CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[G2]], double* [[TMP5]], i32* [[SVAR_ADDR]], float* [[CONV]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G2]], double** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load double*, double** [[_TMP4]], align 4 +// CHECK3-NEXT: store double* [[TMP7]], double** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[CONV]], float** [[TMP9]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -367,99 +381,101 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store double* [[TMP1]], double** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[TMP4]], double** [[_TMP1]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK3-NEXT: store double* [[TMP4]], double** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[TMP9]], double** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double*, double** [[_TMP1]], align 4 -// CHECK3-NEXT: store double* [[G14]], double** [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP1]], align 4 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: store double 1.000000e+00, double* [[G3]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP5]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP14]], align 4 -// CHECK3-NEXT: store i32 3, i32* [[SVAR6]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR7]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G3]], double** [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = load double*, double** [[_TMP5]], align 4 -// CHECK3-NEXT: store double* [[TMP17]], double** [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR6]], i32** [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[SFVAR7]], float** [[TMP19]], align 4 +// CHECK3-NEXT: store double 1.000000e+00, double* [[G]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = load double*, double** [[_TMP3]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP19]], align 4 +// CHECK3-NEXT: store i32 3, i32* [[SVAR]], align 4 +// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load double*, double** [[_TMP3]], align 4 +// CHECK3-NEXT: store double* [[TMP22]], double** [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP24]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK3-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP23:%.*]] = load double, double* [[G3]], align 8 -// CHECK3-NEXT: store volatile double [[TMP23]], double* [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[TMP24]], align 4 -// CHECK3-NEXT: store volatile double [[TMP25]], double* [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[SVAR6]], align 4 -// CHECK3-NEXT: store i32 [[TMP26]], i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load float, float* [[SFVAR7]], align 4 -// CHECK3-NEXT: store float [[TMP27]], float* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP28]], double* [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load double*, double** [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load double, double* [[TMP29]], align 4 +// CHECK3-NEXT: store volatile double [[TMP30]], double* [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP31]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP32]], float* [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -614,6 +630,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -625,21 +642,27 @@ // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK9-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], [2 x i32]* [[TMP0]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[CONV1]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP8]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -648,33 +671,35 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: store %struct.S* [[TMP5]], %struct.S** [[_TMP1]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP11]], %struct.S** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -684,108 +709,108 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store %struct.S* [[VAR6]], %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* -// CHECK9-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[TMP17]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP21]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: [[TMP25:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* +// CHECK9-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[TMP23]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP26]], i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast [2 x %struct.S]* [[S_ARR5]] to %struct.S* -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN13]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN13]], [[TMP30]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK9-NEXT: [[TMP34:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP35:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN8]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP29]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP35]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done14: -// CHECK9-NEXT: [[TMP33:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP6]] to i8* -// CHECK9-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[TMP33]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP36]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP39:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = bitcast %struct.S* [[TMP12]] to i8* +// CHECK9-NEXT: [[TMP41:%.*]] = bitcast %struct.S* [[TMP39]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP42]], i32* [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN15]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP37]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP43]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done16: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -955,6 +980,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 @@ -964,20 +990,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK9-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[CONV]], [2 x i32]* [[TMP0]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S.0* [[TMP7]], %struct.S.0** [[TMP6]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -986,30 +1017,32 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[_TMP1]], align 8 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 8 +// CHECK9-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S.0* [[TMP9]], %struct.S.0** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1019,106 +1052,106 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store %struct.S.0* [[VAR6]], %struct.S.0** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[ARRAYIDX10]] to i8* -// CHECK9-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[TMP16]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* +// CHECK9-NEXT: [[TMP24:%.*]] = bitcast %struct.S.0* [[TMP21]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP23]], i8* align 4 [[TMP24]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK9-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK9-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR5]] to %struct.S.0* -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN12]], [[TMP29]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP30]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK9-NEXT: [[TMP32:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP33:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR]] to %struct.S.0* +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN8]], [[TMP34]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP28]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP33]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP32:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[TMP5]] to i8* -// CHECK9-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[TMP32]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP34]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP37:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = bitcast %struct.S.0* [[TMP10]] to i8* +// CHECK9-NEXT: [[TMP39:%.*]] = bitcast %struct.S.0* [[TMP37]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1313,6 +1346,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1322,21 +1356,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 // CHECK11-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[T_VAR_ADDR]], [2 x i32]* [[TMP0]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1345,33 +1385,35 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP5]], %struct.S** [[_TMP1]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP11]], %struct.S** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1381,106 +1423,106 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store %struct.S* [[VAR6]], %struct.S** [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 [[TMP18]] -// CHECK11-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[ARRAYIDX10]] to i8* -// CHECK11-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[TMP17]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP22]] +// CHECK11-NEXT: store i32 [[TMP21]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP24]] +// CHECK11-NEXT: [[TMP25:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8* +// CHECK11-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[TMP23]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK11-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK11-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP26]], i32* [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast [2 x %struct.S]* [[S_ARR5]] to %struct.S* -// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN12]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN12]], [[TMP30]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP32]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK11-NEXT: [[TMP34:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP35:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN7]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP29]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP35]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done13: -// CHECK11-NEXT: [[TMP33:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP6]] to i8* -// CHECK11-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[TMP33]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK11-NEXT: store i32 [[TMP36]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done8: +// CHECK11-NEXT: [[TMP39:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = bitcast %struct.S* [[TMP12]] to i8* +// CHECK11-NEXT: [[TMP41:%.*]] = bitcast %struct.S* [[TMP39]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP42]], i32* [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN14]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP37]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP43]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done15: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done10: // CHECK11-NEXT: ret void // // @@ -1649,6 +1691,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 @@ -1657,20 +1700,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 // CHECK11-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[T_VAR_ADDR]], [2 x i32]* [[TMP0]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S.0* [[TMP7]], %struct.S.0** [[TMP6]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1679,30 +1727,32 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[_TMP1]], align 4 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 4 +// CHECK11-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S.0* [[TMP9]], %struct.S.0** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1712,104 +1762,104 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store %struct.S.0* [[VAR6]], %struct.S.0** [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* -// CHECK11-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[TMP16]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP20]] +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK11-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5]] to i8* +// CHECK11-NEXT: [[TMP24:%.*]] = bitcast %struct.S.0* [[TMP21]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP23]], i8* align 4 [[TMP24]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], i32* [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK11-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR5]] to %struct.S.0* -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN11]], [[TMP29]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK11-NEXT: [[TMP32:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP33:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR]] to %struct.S.0* +// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN7]], [[TMP34]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP28]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP33]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP32:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[TMP5]] to i8* -// CHECK11-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[TMP32]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP34]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done8: +// CHECK11-NEXT: [[TMP37:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = bitcast %struct.S.0* [[TMP10]] to i8* +// CHECK11-NEXT: [[TMP39:%.*]] = bitcast %struct.S.0* [[TMP37]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done10: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp @@ -801,24 +801,30 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 // CHECK1-NEXT: store double* [[B]], double** [[B_ADDR]], align 8 // CHECK1-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -828,93 +834,107 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP4]], double*** [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -925,110 +945,114 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[TMP10]], double*** [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP12]], double*** [[TMP40]], align 8 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -1041,24 +1065,30 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 // CHECK1-NEXT: store double* [[B]], double** [[B_ADDR]], align 8 // CHECK1-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[CONV]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1068,93 +1098,107 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP4]], double*** [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1165,110 +1209,114 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[TMP10]], double*** [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP12]], double*** [[TMP40]], align 8 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(%class.anon.4* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -1282,6 +1330,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store i64 [[CH]], i64* [[CH_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 @@ -1289,20 +1338,26 @@ // CHECK1-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[CH_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV1]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[CONV1]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1312,120 +1367,134 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK1-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK1-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP6]], double*** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store double** [[TMP10]], double*** [[TMP34]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK1: cond.true10: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END12:%.*]] // CHECK1: cond.false11: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END12]] // CHECK1: cond.end12: -// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] +// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE10]] ], [ [[TMP44]], [[COND_FALSE11]] ] // CHECK1-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1436,110 +1505,114 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_2:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(%class.anon.2* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], %class.anon.7* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], %class.anon.7* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], %class.anon.7* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[TMP10]], double*** [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], %class.anon.7* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP12]], double*** [[TMP40]], align 8 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(%class.anon.7* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -1552,24 +1625,30 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 // CHECK1-NEXT: store double* [[B]], double** [[B_ADDR]], align 8 // CHECK1-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[CONV]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1579,93 +1658,107 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP4]], double*** [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1676,110 +1769,114 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_10:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(%class.anon.3* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], %class.anon.10* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], %class.anon.10* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], %class.anon.10* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[TMP10]], double*** [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], %class.anon.10* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP12]], double*** [[TMP40]], align 8 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(%class.anon.10* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -1793,6 +1890,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK1-NEXT: store i64 [[CH]], i64* [[CH_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 @@ -1800,20 +1898,26 @@ // CHECK1-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[CH_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV1]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[CONV1]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -1824,103 +1928,115 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK1-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK1-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK1-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP6]], double*** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store double** [[TMP10]], double*** [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK1-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -1930,133 +2046,138 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 +// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_13:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP25]] to i32 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP24]], [[CONV6]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] -// CHECK1-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK1-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] -// CHECK1-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I6]], i32** [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP34]], align 8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(%class.anon.4* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load double*, double** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP34]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP36:%.*]] = load double, double* [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load double*, double** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP37]], i64 [[IDXPROM11]] +// CHECK1-NEXT: [[TMP39:%.*]] = load double, double* [[ARRAYIDX12]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK1-NEXT: [[TMP40:%.*]] = load double*, double** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[TMP40]], i64 [[IDXPROM14]] +// CHECK1-NEXT: store double [[ADD13]], double* [[ARRAYIDX15]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], %class.anon.13* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], %class.anon.13* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[I5]], i32** [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], %class.anon.13* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[TMP10]], double*** [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], %class.anon.13* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP12]], double*** [[TMP45]], align 8 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(%class.anon.13* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK1-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP46]], 1 +// CHECK1-NEXT: store i32 [[ADD16]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] +// CHECK1-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] +// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]]) +// CHECK1-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -2069,24 +2190,30 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 // CHECK1-NEXT: store double* [[B]], double** [[B_ADDR]], align 8 // CHECK1-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32* [[CONV]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2096,93 +2223,107 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP4]], double*** [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2193,99 +2334,103 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_5:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_16:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP24]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP24]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK1-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP27]], i64 [[IDXPROM9]] +// CHECK1-NEXT: [[TMP30:%.*]] = load double*, double** [[TMP10]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP30]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP32:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP33:%.*]] = load double*, double** [[TMP12]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP33]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK1-NEXT: [[TMP36:%.*]] = load double*, double** [[TMP8]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP36]], i64 [[IDXPROM9]] // CHECK1-NEXT: store double [[ADD8]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP29]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP30]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP31]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP32]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.5* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !12 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], %class.anon.16* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP38]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], %class.anon.16* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP39]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], %class.anon.16* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[TMP10]], double*** [[TMP40]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], %class.anon.16* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP12]], double*** [[TMP41]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.16* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -2306,6 +2451,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 // CHECK1-NEXT: store i64 [[CH]], i64* [[CH_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 @@ -2313,20 +2459,26 @@ // CHECK1-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[CH_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV1]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[CONV1]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.17*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.17* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.17*, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -2337,103 +2489,115 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK1-NEXT: store %struct.anon.17* [[__CONTEXT]], %struct.anon.17** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], %struct.anon.17* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK1-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK1-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP6]], double*** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store double** [[TMP10]], double*** [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK1-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2443,104 +2607,109 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_6:%.*]], align 8 +// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_20:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[TMP25]], i64 [[IDXPROM8]] -// CHECK1-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX9]], align 8, !llvm.access.group !15 -// CHECK1-NEXT: [[ADD10:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK1-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP28]], i64 [[IDXPROM11]] -// CHECK1-NEXT: store double [[ADD10]], double* [[ARRAYIDX12]], align 8, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP30]], align 8, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I6]], i32** [[TMP31]], align 8, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP32]], align 8, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP33]], align 8, !llvm.access.group !15 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.6* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !15 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP33:%.*]] = load double*, double** [[TMP10]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP33]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP36:%.*]] = load double*, double** [[TMP12]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP36]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP38:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK1-NEXT: [[TMP39:%.*]] = load double*, double** [[TMP8]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP40]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP39]], i64 [[IDXPROM10]] +// CHECK1-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], %class.anon.20* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP41]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], %class.anon.20* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[I5]], i32** [[TMP42]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], %class.anon.20* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[TMP10]], double*** [[TMP43]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], %class.anon.20* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP12]], double*** [[TMP44]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.20* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP45]], 1 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -2593,23 +2762,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK3-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK3-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2619,91 +2794,105 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP4]], double*** [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store double** [[TMP6]], double*** [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP29]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2714,105 +2903,109 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[TMP10]], double*** [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP12]], double*** [[TMP40]], align 4 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK3-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -2825,23 +3018,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK3-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK3-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2851,91 +3050,105 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP4]], double*** [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store double** [[TMP6]], double*** [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP29]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2946,105 +3159,109 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(%class.anon.1* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[TMP10]], double*** [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP12]], double*** [[TMP40]], align 4 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(%class.anon.4* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK3-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -3058,25 +3275,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store i32 [[CH]], i32* [[CH_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK3-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK3-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CH_ADDR]], i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[CH_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3086,118 +3310,132 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK3-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[TMP4]], i32** [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store double** [[TMP10]], double*** [[TMP32]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK3: cond.true10: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END12:%.*]] // CHECK3: cond.false11: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END12]] // CHECK3: cond.end12: -// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] +// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ] // CHECK3-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP45]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3208,105 +3446,109 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_2:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_7:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(%class.anon.2* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], %class.anon.7* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], %class.anon.7* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], %class.anon.7* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[TMP10]], double*** [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], %class.anon.7* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP12]], double*** [[TMP40]], align 4 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(%class.anon.7* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK3-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -3319,23 +3561,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK3-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK3-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3345,91 +3593,105 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP4]], double*** [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store double** [[TMP6]], double*** [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP29]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3440,105 +3702,109 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_10:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(%class.anon.3* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], %class.anon.10* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], %class.anon.10* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], %class.anon.10* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[TMP10]], double*** [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], %class.anon.10* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP12]], double*** [[TMP40]], align 4 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(%class.anon.10* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK3-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -3552,25 +3818,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK3-NEXT: store i32 [[CH]], i32* [[CH_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK3-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK3-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CH_ADDR]], i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[CH_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -3581,100 +3854,113 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK3-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[TMP4]], i32** [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store double** [[TMP10]], double*** [[TMP32]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK3-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3685,124 +3971,130 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_13:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK3-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX8]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK3-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double*, double** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP34]], i32 [[TMP35]] +// CHECK3-NEXT: [[TMP36:%.*]] = load double, double* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load double*, double** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP37]], i32 [[TMP38]] +// CHECK3-NEXT: [[TMP39:%.*]] = load double, double* [[ARRAYIDX8]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK3-NEXT: [[TMP40:%.*]] = load double*, double** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP40]], i32 [[TMP41]] // CHECK3-NEXT: store double [[ADD9]], double* [[ARRAYIDX10]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I4]], i32** [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP33]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP34]], align 4 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(%class.anon.4* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], %class.anon.13* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP42]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], %class.anon.13* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[I4]], i32** [[TMP43]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], %class.anon.13* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[TMP10]], double*** [[TMP44]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], %class.anon.13* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP12]], double*** [[TMP45]], align 4 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(%class.anon.13* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP46]], 1 // CHECK3-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK3-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] // CHECK3-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK3-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] // CHECK3-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]]) +// CHECK3-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -3815,23 +4107,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK3-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK3-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3841,91 +4139,105 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP4]], double*** [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store double** [[TMP6]], double*** [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP29]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3936,94 +4248,98 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_5:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_16:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP24]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i32 [[TMP22]] -// CHECK3-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK3-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP27]], i32 [[TMP28]] +// CHECK3-NEXT: [[TMP30:%.*]] = load double*, double** [[TMP10]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP30]], i32 [[TMP31]] +// CHECK3-NEXT: [[TMP32:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP33:%.*]] = load double*, double** [[TMP12]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[TMP33]], i32 [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK3-NEXT: [[TMP36:%.*]] = load double*, double** [[TMP8]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP36]], i32 [[TMP37]] // CHECK3-NEXT: store double [[ADD6]], double* [[ARRAYIDX7]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP29]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP30]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP31]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP32]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.5* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !13 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], %class.anon.16* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP38]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], %class.anon.16* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP39]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], %class.anon.16* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[TMP10]], double*** [[TMP40]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], %class.anon.16* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP12]], double*** [[TMP41]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.16* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !13 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -4044,25 +4360,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK3-NEXT: store i32 [[CH]], i32* [[CH_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK3-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK3-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32* [[CH_ADDR]], i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[CH_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.17*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.17* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.17*, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -4073,100 +4396,113 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK3-NEXT: store %struct.anon.17* [[__CONTEXT]], %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], %struct.anon.17* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[TMP4]], i32** [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store double** [[TMP10]], double*** [[TMP32]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK3-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4177,96 +4513,102 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_6:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_19:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP29]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i32 [[TMP23]] -// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP25]], i32 [[TMP26]] -// CHECK3-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK3-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 [[TMP29]] +// CHECK3-NEXT: [[TMP33:%.*]] = load double*, double** [[TMP10]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP33]], i32 [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP36:%.*]] = load double*, double** [[TMP12]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP36]], i32 [[TMP37]] +// CHECK3-NEXT: [[TMP38:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK3-NEXT: [[TMP39:%.*]] = load double*, double** [[TMP8]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP39]], i32 [[TMP40]] // CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP30]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I4]], i32** [[TMP31]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP32]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP33]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.6* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !16 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], %class.anon.19* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP41]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], %class.anon.19* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[I4]], i32** [[TMP42]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], %class.anon.19* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[TMP10]], double*** [[TMP43]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], %class.anon.19* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP12]], double*** [[TMP44]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.19* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP45]], 1 // CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -4810,24 +5152,30 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 // CHECK9-NEXT: store double* [[B]], double** [[B_ADDR]], align 8 // CHECK9-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4837,93 +5185,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[TMP4]], double*** [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4936,98 +5298,102 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -5040,24 +5406,30 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 // CHECK9-NEXT: store double* [[B]], double** [[B_ADDR]], align 8 // CHECK9-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[CONV]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5067,93 +5439,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[TMP4]], double*** [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5166,98 +5552,102 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -5271,6 +5661,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], i64* [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 @@ -5278,20 +5669,26 @@ // CHECK9-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[CH_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV1]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5301,120 +5698,134 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP4]], i32** [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[TMP6]], double*** [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store double** [[TMP8]], double*** [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store double** [[TMP10]], double*** [[TMP34]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK9-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK9: cond.true10: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END12:%.*]] // CHECK9: cond.false11: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END12]] // CHECK9: cond.end12: -// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] +// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE10]] ], [ [[TMP44]], [[COND_FALSE11]] ] // CHECK9-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5427,98 +5838,102 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -5531,24 +5946,30 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 // CHECK9-NEXT: store double* [[B]], double** [[B_ADDR]], align 8 // CHECK9-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[CONV]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5558,93 +5979,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[TMP4]], double*** [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5657,98 +6092,102 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -5762,6 +6201,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], i64* [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 @@ -5769,20 +6209,26 @@ // CHECK9-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[CH_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV1]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -5793,103 +6239,115 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP4]], i32** [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[TMP6]], double*** [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store double** [[TMP8]], double*** [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store double** [[TMP10]], double*** [[TMP34]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK9-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5899,123 +6357,128 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP25]] to i32 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP24]], [[CONV6]] +// CHECK9-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK9-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] -// CHECK9-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8 -// CHECK9-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK9-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] -// CHECK9-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = load double*, double** [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP34]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP36:%.*]] = load double, double* [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[TMP37:%.*]] = load double*, double** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP37]], i64 [[IDXPROM11]] +// CHECK9-NEXT: [[TMP39:%.*]] = load double, double* [[ARRAYIDX12]], align 8 +// CHECK9-NEXT: [[ADD13:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK9-NEXT: [[TMP40:%.*]] = load double*, double** [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[TMP40]], i64 [[IDXPROM14]] +// CHECK9-NEXT: store double [[ADD13]], double* [[ARRAYIDX15]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP42]], 1 +// CHECK9-NEXT: store i32 [[ADD16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] +// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP37]]) +// CHECK9-NEXT: [[TMP47:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, i32* [[TMP47]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP48]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -6028,24 +6491,30 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 // CHECK9-NEXT: store double* [[B]], double** [[B_ADDR]], align 8 // CHECK9-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32* [[CONV]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6055,93 +6524,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[TMP4]], double*** [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6154,87 +6637,91 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP24]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !19 -// CHECK9-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !19 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !19 -// CHECK9-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !19 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 -// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP24]], i64 [[IDXPROM6]] -// CHECK9-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !19 -// CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !19 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP27]], i64 [[IDXPROM9]] +// CHECK9-NEXT: [[TMP30:%.*]] = load double*, double** [[TMP10]], align 8, !llvm.access.group !19 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP30]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP32:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !19 +// CHECK9-NEXT: [[TMP33:%.*]] = load double*, double** [[TMP12]], align 8, !llvm.access.group !19 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP33]], i64 [[IDXPROM6]] +// CHECK9-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !19 +// CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK9-NEXT: [[TMP36:%.*]] = load double*, double** [[TMP8]], align 8, !llvm.access.group !19 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP36]], i64 [[IDXPROM9]] // CHECK9-NEXT: store double [[ADD8]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !19 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK9: omp.inner.for.end: @@ -6255,6 +6742,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], i64* [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 @@ -6262,20 +6750,26 @@ // CHECK9-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[CH_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV1]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -6286,103 +6780,115 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP4]], i32** [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[TMP6]], double*** [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store double** [[TMP8]], double*** [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store double** [[TMP10]], double*** [[TMP34]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK9-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -6392,94 +6898,99 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !22 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !22 -// CHECK9-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !22 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[TMP25]], i64 [[IDXPROM8]] -// CHECK9-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX9]], align 8, !llvm.access.group !22 -// CHECK9-NEXT: [[ADD10:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK9-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !22 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP28]], i64 [[IDXPROM11]] -// CHECK9-NEXT: store double [[ADD10]], double* [[ARRAYIDX12]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP33:%.*]] = load double*, double** [[TMP10]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP33]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP36:%.*]] = load double*, double** [[TMP12]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP36]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP38:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK9-NEXT: [[TMP39:%.*]] = load double*, double** [[TMP8]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP40]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP39]], i64 [[IDXPROM10]] +// CHECK9-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !22 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK9-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -7012,24 +7523,30 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 8 // CHECK9-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32* [[CONV]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32** [[A_ADDR]], i32*** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32** [[B_ADDR]], i32*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[C_ADDR]], i32*** [[TMP3]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7039,93 +7556,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[TMP4]], i32*** [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32** [[TMP6]], i32*** [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i32** [[TMP8]], i32*** [[TMP31]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7138,111 +7669,115 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP21]], i32 2) -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], i32 2) +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK9: .cancel.exit: // CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK9: .cancel.continue: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP24]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP29]] -// CHECK9-NEXT: [[TMP30:%.*]] = load i32*, i32** [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP31]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP30]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32*, i32** [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP33]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32*, i32** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP36]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP38]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32*, i32** [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP40]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP39]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK9-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: cancel.exit: -// CHECK9-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK9-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP45]]) // CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: omp.precond.end: // CHECK9-NEXT: br label [[CANCEL_CONT]] @@ -7257,24 +7792,30 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 8 // CHECK9-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* [[CONV]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32** [[A_ADDR]], i32*** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32** [[B_ADDR]], i32*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[C_ADDR]], i32*** [[TMP3]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.16*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7284,93 +7825,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[TMP4]], i32*** [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32** [[TMP6]], i32*** [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i32** [[TMP8]], i32*** [[TMP31]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.17*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.17* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.17*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7383,98 +7938,102 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.17* [[__CONTEXT]], %struct.anon.17** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], %struct.anon.17* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32*, i32** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -7488,6 +8047,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], i64* [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -7495,20 +8055,26 @@ // CHECK9-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[CH_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV1]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32** [[A_ADDR]], i32*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[B_ADDR]], i32*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32** [[C_ADDR]], i32*** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7518,120 +8084,134 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_19:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK9-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP4]], i32** [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[TMP6]], i32*** [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32** [[TMP8]], i32*** [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i32** [[TMP10]], i32*** [[TMP34]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.19*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK9-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK9: cond.true10: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END12:%.*]] // CHECK9: cond.false11: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END12]] // CHECK9: cond.end12: -// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] +// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE10]] ], [ [[TMP44]], [[COND_FALSE11]] ] // CHECK9-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.19* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.19*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7644,98 +8224,102 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.19* [[__CONTEXT]], %struct.anon.19** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.19*, %struct.anon.19** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_19:%.*]], %struct.anon.19* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32*, i32** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -7748,24 +8332,30 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 8 // CHECK9-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..38 to void (i32*, i32*, ...)*), i32* [[CONV]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32** [[A_ADDR]], i32*** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32** [[B_ADDR]], i32*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[C_ADDR]], i32*** [[TMP3]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.20*)* @.omp_outlined..38 to void (i32*, i32*, ...)*), %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..38 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.20* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.20*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7775,93 +8365,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.20* [[__CONTEXT]], %struct.anon.20** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.20*, %struct.anon.20** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], %struct.anon.20* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[TMP4]], i32*** [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32** [[TMP6]], i32*** [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i32** [[TMP8]], i32*** [[TMP31]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.21*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..39 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.21* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.21*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7874,98 +8478,102 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.21* [[__CONTEXT]], %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], %struct.anon.21* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32*, i32** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -7979,6 +8587,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], i64* [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -7986,20 +8595,26 @@ // CHECK9-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[CH_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..42 to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV1]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32** [[A_ADDR]], i32*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[B_ADDR]], i32*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32** [[C_ADDR]], i32*** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.22*)* @.omp_outlined..42 to void (i32*, i32*, ...)*), %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..42 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.22* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.22*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -8010,103 +8625,115 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store %struct.anon.22* [[__CONTEXT]], %struct.anon.22** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.22*, %struct.anon.22** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], %struct.anon.22* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**, i64)* @.omp_outlined..43 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i64 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP4]], i32** [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[TMP6]], i32*** [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32** [[TMP8]], i32*** [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i32** [[TMP10]], i32*** [[TMP34]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.23*)* @.omp_outlined..43 to void (i32*, i32*, ...)*), %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK9-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..43 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.23* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.23*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8116,123 +8743,128 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.23* [[__CONTEXT]], %struct.anon.23** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.23*, %struct.anon.23** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], %struct.anon.23* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP25]] to i32 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP24]], [[CONV6]] +// CHECK9-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK9-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM12]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX13]], align 4 -// CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK9-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM15]] -// CHECK9-NEXT: store i32 [[ADD14]], i32* [[ARRAYIDX16]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP34]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32*, i32** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[TMP37]], i64 [[IDXPROM11]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[ARRAYIDX12]], align 4 +// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP36]], [[TMP39]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32*, i32** [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[TMP40]], i64 [[IDXPROM14]] +// CHECK9-NEXT: store i32 [[ADD13]], i32* [[ARRAYIDX15]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP42]], 1 +// CHECK9-NEXT: store i32 [[ADD16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] +// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP37]]) +// CHECK9-NEXT: [[TMP47:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, i32* [[TMP47]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP48]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -8245,24 +8877,30 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 8 // CHECK9-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..46 to void (i32*, i32*, ...)*), i32* [[CONV]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32** [[A_ADDR]], i32*** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32** [[B_ADDR]], i32*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[C_ADDR]], i32*** [[TMP3]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.25*)* @.omp_outlined..46 to void (i32*, i32*, ...)*), %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..46 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.25* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.25*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8272,93 +8910,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_26:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.25* [[__CONTEXT]], %struct.anon.25** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.25*, %struct.anon.25** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25:%.*]], %struct.anon.25* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..47 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[TMP4]], i32*** [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32** [[TMP6]], i32*** [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i32** [[TMP8]], i32*** [[TMP31]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.26*)* @.omp_outlined..47 to void (i32*, i32*, ...)*), %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..47 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.26* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.26*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8371,87 +9023,91 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.26* [[__CONTEXT]], %struct.anon.26** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.26*, %struct.anon.26** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_26:%.*]], %struct.anon.26* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP24]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !25 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !25 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !25 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 -// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[TMP24]], i64 [[IDXPROM6]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !25 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !25 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i64 [[IDXPROM9]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32*, i32** [[TMP10]], align 8, !llvm.access.group !25 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP30]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32*, i32** [[TMP12]], align 8, !llvm.access.group !25 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[TMP33]], i64 [[IDXPROM6]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], [[TMP35]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32*, i32** [[TMP8]], align 8, !llvm.access.group !25 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[TMP36]], i64 [[IDXPROM9]] // CHECK9-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX10]], align 4, !llvm.access.group !25 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK9: omp.inner.for.end: @@ -8472,6 +9128,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_27:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], i64* [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -8479,20 +9136,26 @@ // CHECK9-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[CH_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..50 to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV1]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], %struct.anon.27* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], %struct.anon.27* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], %struct.anon.27* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32** [[A_ADDR]], i32*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], %struct.anon.27* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[B_ADDR]], i32*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], %struct.anon.27* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32** [[C_ADDR]], i32*** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.27*)* @.omp_outlined..50 to void (i32*, i32*, ...)*), %struct.anon.27* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..50 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.27* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.27*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -8503,103 +9166,115 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_28:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store %struct.anon.27* [[__CONTEXT]], %struct.anon.27** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.27*, %struct.anon.27** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_27:%.*]], %struct.anon.27* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], %struct.anon.27* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], %struct.anon.27* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], %struct.anon.27* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], %struct.anon.27* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**, i64)* @.omp_outlined..51 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i64 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP4]], i32** [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[TMP6]], i32*** [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32** [[TMP8]], i32*** [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i32** [[TMP10]], i32*** [[TMP34]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.28*)* @.omp_outlined..51 to void (i32*, i32*, ...)*), %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK9-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..51 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.28* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.28*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8609,94 +9284,99 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.28* [[__CONTEXT]], %struct.anon.28** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.28*, %struct.anon.28** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_28:%.*]], %struct.anon.28* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !28 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP22]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !28 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i64 [[IDXPROM8]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP27]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !28 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[TMP28]], i64 [[IDXPROM11]] -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[ARRAYIDX12]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32*, i32** [[TMP10]], align 8, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP33]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32*, i32** [[TMP12]], align 8, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP36]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP38]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32*, i32** [[TMP8]], align 8, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP40]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP39]], i64 [[IDXPROM10]] +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4, !llvm.access.group !28 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK9-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -9229,23 +9909,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK11-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK11-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9255,91 +9941,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[TMP4]], double*** [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store double** [[TMP6]], double*** [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store double** [[TMP8]], double*** [[TMP29]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9352,93 +10052,97 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -9451,23 +10155,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK11-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK11-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9477,91 +10187,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[TMP4]], double*** [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store double** [[TMP6]], double*** [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store double** [[TMP8]], double*** [[TMP29]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9574,93 +10298,97 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -9674,25 +10402,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], i32* [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK11-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK11-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CH_ADDR]], i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[CH_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9702,118 +10437,132 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP4]], i32** [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store double** [[TMP10]], double*** [[TMP32]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK11-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK11: cond.true10: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END12:%.*]] // CHECK11: cond.false11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END12]] // CHECK11: cond.end12: -// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] +// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ] // CHECK11-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP45]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9826,93 +10575,97 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -9925,23 +10678,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK11-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK11-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9951,91 +10710,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[TMP4]], double*** [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store double** [[TMP6]], double*** [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store double** [[TMP8]], double*** [[TMP29]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10048,93 +10821,97 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -10148,25 +10925,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], i32* [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK11-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK11-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CH_ADDR]], i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[CH_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -10177,100 +10961,113 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP4]], i32** [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store double** [[TMP10]], double*** [[TMP32]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10283,112 +11080,118 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX8]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double*, double** [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP34]], i32 [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load double, double* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = load double*, double** [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP37]], i32 [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load double, double* [[ARRAYIDX8]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK11-NEXT: [[TMP40:%.*]] = load double*, double** [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP40]], i32 [[TMP41]] // CHECK11-NEXT: store double [[ADD9]], double* [[ARRAYIDX10]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK11-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] // CHECK11-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] // CHECK11-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP37]]) +// CHECK11-NEXT: [[TMP47:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, i32* [[TMP47]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP48]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -10401,23 +11204,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK11-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK11-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10427,91 +11236,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[TMP4]], double*** [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store double** [[TMP6]], double*** [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store double** [[TMP8]], double*** [[TMP29]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10524,82 +11347,86 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP24]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !20 -// CHECK11-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !20 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !20 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i32 [[TMP22]] -// CHECK11-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !20 -// CHECK11-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !20 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !20 -// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !20 -// CHECK11-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !20 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !20 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP27]], i32 [[TMP28]] +// CHECK11-NEXT: [[TMP30:%.*]] = load double*, double** [[TMP10]], align 4, !llvm.access.group !20 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !20 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP30]], i32 [[TMP31]] +// CHECK11-NEXT: [[TMP32:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !20 +// CHECK11-NEXT: [[TMP33:%.*]] = load double*, double** [[TMP12]], align 4, !llvm.access.group !20 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !20 +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !20 +// CHECK11-NEXT: [[ADD6:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load double*, double** [[TMP8]], align 4, !llvm.access.group !20 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !20 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP36]], i32 [[TMP37]] // CHECK11-NEXT: store double [[ADD6]], double* [[ARRAYIDX7]], align 4, !llvm.access.group !20 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -10620,25 +11447,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], i32* [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK11-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK11-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32* [[CH_ADDR]], i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[CH_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -10649,100 +11483,113 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP4]], i32** [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store double** [[TMP10]], double*** [[TMP32]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10755,84 +11602,90 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i32 [[TMP23]] -// CHECK11-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP25]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 [[TMP29]] +// CHECK11-NEXT: [[TMP33:%.*]] = load double*, double** [[TMP10]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP36:%.*]] = load double*, double** [[TMP12]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP36]], i32 [[TMP37]] +// CHECK11-NEXT: [[TMP38:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load double*, double** [[TMP8]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP39]], i32 [[TMP40]] // CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !23 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -11356,23 +12209,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 4 // CHECK11-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32** [[A_ADDR]], i32*** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32** [[B_ADDR]], i32*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[C_ADDR]], i32*** [[TMP3]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11382,91 +12241,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[TMP4]], i32*** [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32** [[TMP6]], i32*** [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32** [[TMP8]], i32*** [[TMP29]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11479,106 +12352,110 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP21]], i32 2) -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], i32 2) +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK11: .cancel.exit: // CHECK11-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK11: .cancel.continue: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP24]], i32 [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i32 [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP29]] -// CHECK11-NEXT: [[TMP30:%.*]] = load i32*, i32** [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP30]], i32 [[TMP31]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32*, i32** [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32*, i32** [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP36]], i32 [[TMP37]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32*, i32** [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP39]], i32 [[TMP40]] // CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK11-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: cancel.exit: -// CHECK11-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK11-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP45]]) // CHECK11-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK11: omp.precond.end: // CHECK11-NEXT: br label [[CANCEL_CONT]] @@ -11593,23 +12470,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 4 // CHECK11-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32** [[A_ADDR]], i32*** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32** [[B_ADDR]], i32*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[C_ADDR]], i32*** [[TMP3]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11619,91 +12502,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[TMP4]], i32*** [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32** [[TMP6]], i32*** [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32** [[TMP8]], i32*** [[TMP29]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.16*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11716,93 +12613,97 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -11816,25 +12717,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], i32* [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 4 // CHECK11-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* [[CH_ADDR]], i32* [[N_ADDR]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[CH_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32** [[A_ADDR]], i32*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[B_ADDR]], i32*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32** [[C_ADDR]], i32*** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.17*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.17* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.17*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11844,118 +12752,132 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK11-NEXT: store %struct.anon.17* [[__CONTEXT]], %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], %struct.anon.17* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP4]], i32** [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[TMP6]], i32*** [[TMP30]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32** [[TMP8]], i32*** [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32** [[TMP10]], i32*** [[TMP32]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK11-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK11: cond.true10: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END12:%.*]] // CHECK11: cond.false11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END12]] // CHECK11: cond.end12: -// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] +// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ] // CHECK11-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP45]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11968,93 +12890,97 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -12067,23 +12993,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_19:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 4 // CHECK11-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..38 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32** [[A_ADDR]], i32*** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32** [[B_ADDR]], i32*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[C_ADDR]], i32*** [[TMP3]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.19*)* @.omp_outlined..38 to void (i32*, i32*, ...)*), %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..38 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.19* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.19*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12093,91 +13025,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.19* [[__CONTEXT]], %struct.anon.19** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.19*, %struct.anon.19** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_19:%.*]], %struct.anon.19* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[TMP4]], i32*** [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32** [[TMP6]], i32*** [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32** [[TMP8]], i32*** [[TMP29]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.20*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..39 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.20* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.20*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12190,93 +13136,97 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.20* [[__CONTEXT]], %struct.anon.20** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.20*, %struct.anon.20** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], %struct.anon.20* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -12290,25 +13240,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], i32* [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 4 // CHECK11-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..42 to void (i32*, i32*, ...)*), i32* [[CH_ADDR]], i32* [[N_ADDR]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[CH_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32** [[A_ADDR]], i32*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[B_ADDR]], i32*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32** [[C_ADDR]], i32*** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.21*)* @.omp_outlined..42 to void (i32*, i32*, ...)*), %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..42 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.21* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.21*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -12319,100 +13276,113 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store %struct.anon.21* [[__CONTEXT]], %struct.anon.21** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], %struct.anon.21* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**, i32)* @.omp_outlined..43 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP4]], i32** [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[TMP6]], i32*** [[TMP30]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32** [[TMP8]], i32*** [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32** [[TMP10]], i32*** [[TMP32]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.22*)* @.omp_outlined..43 to void (i32*, i32*, ...)*), %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..43 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.22* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.22*, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -12425,112 +13395,118 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.22* [[__CONTEXT]], %struct.anon.22** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.22*, %struct.anon.22** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], %struct.anon.22* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32*, i32** [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP34]], i32 [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32*, i32** [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP37]], i32 [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP39]] +// CHECK11-NEXT: [[TMP40:%.*]] = load i32*, i32** [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[TMP40]], i32 [[TMP41]] // CHECK11-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX10]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK11-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] // CHECK11-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] // CHECK11-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP37]]) +// CHECK11-NEXT: [[TMP47:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, i32* [[TMP47]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP48]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -12543,23 +13519,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 4 // CHECK11-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..46 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32** [[A_ADDR]], i32*** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32** [[B_ADDR]], i32*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[C_ADDR]], i32*** [[TMP3]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.23*)* @.omp_outlined..46 to void (i32*, i32*, ...)*), %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..46 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.23* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.23*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12569,91 +13551,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.23* [[__CONTEXT]], %struct.anon.23** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.23*, %struct.anon.23** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], %struct.anon.23* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..47 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[TMP4]], i32*** [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32** [[TMP6]], i32*** [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32** [[TMP8]], i32*** [[TMP29]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.24*)* @.omp_outlined..47 to void (i32*, i32*, ...)*), %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..47 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.24* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.24*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12666,82 +13662,86 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.24* [[__CONTEXT]], %struct.anon.24** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.24*, %struct.anon.24** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], %struct.anon.24* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP24]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !26 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !26 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 [[TMP22]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !26 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 -// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[TMP24]], i32 [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4, !llvm.access.group !26 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !26 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i32 [[TMP28]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32*, i32** [[TMP10]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP30]], i32 [[TMP31]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32*, i32** [[TMP12]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32*, i32** [[TMP8]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[TMP36]], i32 [[TMP37]] // CHECK11-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX7]], align 4, !llvm.access.group !26 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -12762,25 +13762,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], i32* [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 4 // CHECK11-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..50 to void (i32*, i32*, ...)*), i32* [[CH_ADDR]], i32* [[N_ADDR]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[CH_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32** [[A_ADDR]], i32*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[B_ADDR]], i32*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32** [[C_ADDR]], i32*** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.25*)* @.omp_outlined..50 to void (i32*, i32*, ...)*), %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..50 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.25* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.25*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -12791,100 +13798,113 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_26:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store %struct.anon.25* [[__CONTEXT]], %struct.anon.25** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.25*, %struct.anon.25** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25:%.*]], %struct.anon.25* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**, i32)* @.omp_outlined..51 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP4]], i32** [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[TMP6]], i32*** [[TMP30]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32** [[TMP8]], i32*** [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32** [[TMP10]], i32*** [[TMP32]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.26*)* @.omp_outlined..51 to void (i32*, i32*, ...)*), %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..51 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.26* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.26*, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -12897,84 +13917,90 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.26* [[__CONTEXT]], %struct.anon.26** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.26*, %struct.anon.26** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_26:%.*]], %struct.anon.26* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !29 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !29 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP22]], i32 [[TMP23]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !29 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !29 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !29 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !29 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP28]], i32 [[TMP29]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32*, i32** [[TMP10]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32*, i32** [[TMP12]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP36]], i32 [[TMP37]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32*, i32** [[TMP8]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP39]], i32 [[TMP40]] // CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !29 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK11: omp.inner.for.end: diff --git a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp @@ -289,6 +289,7 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 @@ -298,20 +299,25 @@ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* // CHECK1-NEXT: store double* [[CONV1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[CONV]], double* [[TMP0]], i32* [[CONV2]], float* [[CONV3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[CONV]], double** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP2]], double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[CONV3]], float** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -320,199 +326,209 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store double* [[G]], double** [[G_ADDR]], align 8 -// CHECK1-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store double* [[TMP1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[TMP4]], double** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK1-NEXT: store double* [[TMP4]], double** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP9]], double** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK1-NEXT: store double [[TMP5]], double* [[G3]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8 -// CHECK1-NEXT: store double [[TMP7]], double* [[G14]], align 8 -// CHECK1-NEXT: store double* [[G14]], double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[SVAR6]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load float, float* [[TMP3]], align 4 -// CHECK1-NEXT: store float [[TMP9]], float* [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load double, double* [[TMP2]], align 8 +// CHECK1-NEXT: store double [[TMP10]], double* [[G]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 8 +// CHECK1-NEXT: store double [[TMP12]], double* [[G1]], align 8 +// CHECK1-NEXT: store double* [[G1]], double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load float, float* [[TMP8]], align 4 +// CHECK1-NEXT: store float [[TMP14]], float* [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load double, double* [[G3]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[G_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP21]], double* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load volatile double, double* [[TMP23]], align 8 -// CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[G1_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP24]], double* [[CONV9]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[SVAR6]], align 4 -// CHECK1-NEXT: [[CONV10:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP26]], i32* [[CONV10]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load float, float* [[SFVAR7]], align 4 -// CHECK1-NEXT: [[CONV11:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* -// CHECK1-NEXT: store float [[TMP28]], float* [[CONV11]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP25]], i64 [[TMP27]], i64 [[TMP29]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = load double, double* [[G]], align 8 +// CHECK1-NEXT: store double [[TMP29]], double* [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP31:%.*]] = load double*, double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load volatile double, double* [[TMP31]], align 8 +// CHECK1-NEXT: store double [[TMP32]], double* [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP36:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP36]], float* [[TMP35]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], i64* [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[G_ADDR]] to double* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to double* -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* -// CHECK1-NEXT: store double* [[CONV1]], double** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double, double* [[TMP5]], align 8 +// CHECK1-NEXT: store double [[TMP6]], double* [[G]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, double* [[TMP7]], align 8 +// CHECK1-NEXT: store double [[TMP8]], double* [[G1]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load float, float* [[TMP11]], align 4 +// CHECK1-NEXT: store float [[TMP12]], float* [[SFVAR]], align 4 +// CHECK1-NEXT: store double* [[G1]], double** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: store double 1.000000e+00, double* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 8 -// CHECK1-NEXT: store i32 3, i32* [[CONV2]], align 4 -// CHECK1-NEXT: store float 4.000000e+00, float* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[CONV]], double** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[TMP13]], double** [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[CONV3]], float** [[TMP15]], align 8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK1-NEXT: store double 1.000000e+00, double* [[G]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP23]], align 8 +// CHECK1-NEXT: store i32 3, i32* [[SVAR]], align 4 +// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[G]], double** [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP26]], double** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP28]], align 8 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) // CHECK1-NEXT: ret void // // @@ -552,6 +568,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 @@ -566,20 +583,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, double* [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], double* [[G13]], align 8 // CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[G2]], double* [[TMP5]], i32* [[SVAR_ADDR]], float* [[CONV]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G2]], double** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load double*, double** [[_TMP4]], align 4 +// CHECK3-NEXT: store double* [[TMP7]], double** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[CONV]], float** [[TMP9]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -588,191 +610,205 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store double* [[TMP1]], double** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[TMP4]], double** [[_TMP1]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK3-NEXT: store double* [[TMP4]], double** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[TMP9]], double** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK3-NEXT: store double [[TMP5]], double* [[G3]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[_TMP1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 4 -// CHECK3-NEXT: store double [[TMP7]], double* [[G14]], align 8 -// CHECK3-NEXT: store double* [[G14]], double** [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[SVAR6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load float, float* [[TMP3]], align 4 -// CHECK3-NEXT: store float [[TMP9]], float* [[SFVAR7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load double, double* [[TMP2]], align 8 +// CHECK3-NEXT: store double [[TMP10]], double* [[G]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 4 +// CHECK3-NEXT: store double [[TMP12]], double* [[G1]], align 8 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load float, float* [[TMP8]], align 4 +// CHECK3-NEXT: store float [[TMP14]], float* [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load double*, double** [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load volatile double, double* [[TMP19]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[G1_CASTED]] to double* -// CHECK3-NEXT: store double [[TMP20]], double* [[CONV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[G1_CASTED]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[SVAR6]], align 4 -// CHECK3-NEXT: store i32 [[TMP22]], i32* [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR7]], align 4 -// CHECK3-NEXT: [[CONV9:%.*]] = bitcast i32* [[SFVAR_CASTED]] to float* -// CHECK3-NEXT: store float [[TMP24]], float* [[CONV9]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, i32, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], double* [[G3]], i32 [[TMP21]], i32 [[TMP23]], i32 [[TMP25]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store double* [[G]], double** [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP28:%.*]] = load double*, double** [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load volatile double, double* [[TMP28]], align 4 +// CHECK3-NEXT: store double [[TMP29]], double* [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP31]], i32* [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP33:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP33]], float* [[TMP32]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]], i32 noundef [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[G1]], i32* [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], i32* [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[G1_ADDR]] to double* -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[SFVAR_ADDR]] to float* -// CHECK3-NEXT: store double* [[CONV]], double** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double, double* [[TMP7]], align 4 +// CHECK3-NEXT: store double [[TMP8]], double* [[G1]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load float, float* [[TMP11]], align 4 +// CHECK3-NEXT: store float [[TMP12]], float* [[SFVAR]], align 4 +// CHECK3-NEXT: store double* [[G1]], double** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK3-NEXT: store double [[TMP3]], double* [[G3]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load double, double* [[TMP6]], align 8 +// CHECK3-NEXT: store double [[TMP15]], double* [[G]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: store double 1.000000e+00, double* [[G3]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP12]], align 4 -// CHECK3-NEXT: store i32 3, i32* [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, float* [[CONV1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G3]], double** [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[TMP15]], double** [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[CONV1]], float** [[TMP17]], align 4 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK3-NEXT: store double 1.000000e+00, double* [[G]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP24]], align 4 +// CHECK3-NEXT: store i32 3, i32* [[SVAR]], align 4 +// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[TMP27]], double** [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP29]], align 4 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) // CHECK3-NEXT: ret void // // @@ -924,6 +960,7 @@ // CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK8-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK8-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK8-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK8-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -935,21 +972,27 @@ // CHECK8-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK8-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK8-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], [2 x i32]* [[TMP0]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[CONV1]]) +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK8-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 8 +// CHECK8-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK8-NEXT: store i32* [[CONV1]], i32** [[TMP8]], align 8 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK8-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -958,267 +1001,282 @@ // CHECK8-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK8-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK8-NEXT: [[_TMP8:%.*]] = alloca %struct.S*, align 8 -// CHECK8-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca %struct.S*, align 8 +// CHECK8-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK8-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK8-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 8 -// CHECK8-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK8-NEXT: store %struct.S* [[TMP5]], %struct.S** [[_TMP1]], align 8 +// CHECK8-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK8-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK8-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 8 +// CHECK8-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK8-NEXT: store %struct.S* [[TMP11]], %struct.S** [[_TMP1]], align 8 // CHECK8-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK8-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK8-NEXT: store i32 [[TMP6]], i32* [[T_VAR3]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK8-NEXT: [[TMP8:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP7]], i8* align 4 [[TMP8]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP9:%.*]] = bitcast [2 x %struct.S]* [[TMP2]] to %struct.S* -// CHECK8-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP10]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK8-NEXT: store i32 [[TMP12]], i32* [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP13:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK8-NEXT: [[TMP14:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP15:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK8-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP16]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK8-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK8-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 4, i1 false) +// CHECK8-NEXT: [[TMP17:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK8-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i64 4, i1 false) // CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP10]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done6: -// CHECK8-NEXT: [[TMP13:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 -// CHECK8-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK8-NEXT: [[TMP15:%.*]] = bitcast %struct.S* [[TMP13]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false) -// CHECK8-NEXT: store %struct.S* [[VAR7]], %struct.S** [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK8-NEXT: store i32 [[TMP16]], i32* [[SVAR9]], align 4 -// CHECK8-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP18]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP19:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 +// CHECK8-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK8-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[TMP19]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) +// CHECK8-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK8-NEXT: store i32 [[TMP22]], i32* [[SVAR]], align 4 +// CHECK8-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP24]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP25]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] -// CHECK8-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK8-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 -// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK8-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK8-NEXT: store i32 [[TMP28]], i32* [[CONV]], align 4 -// CHECK8-NEXT: [[TMP29:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK8-NEXT: [[TMP30:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP31:%.*]] = load i32, i32* [[SVAR9]], align 4 -// CHECK8-NEXT: [[CONV11:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK8-NEXT: store i32 [[TMP31]], i32* [[CONV11]], align 4 -// CHECK8-NEXT: [[TMP32:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP25]], i64 [[TMP27]], [2 x i32]* [[VEC4]], i64 [[TMP29]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP30]], i64 [[TMP32]]) +// CHECK8-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: [[TMP31:%.*]] = zext i32 [[TMP30]] to i64 +// CHECK8-NEXT: store i64 [[TMP31]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK8-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP33:%.*]] = zext i32 [[TMP32]] to i64 +// CHECK8-NEXT: store i64 [[TMP33]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK8-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP34]], align 8 +// CHECK8-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP35]], align 8 +// CHECK8-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP36]], align 8 +// CHECK8-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP38:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK8-NEXT: store i32 [[TMP38]], i32* [[TMP37]], align 8 +// CHECK8-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK8-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP39]], align 8 +// CHECK8-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK8-NEXT: [[TMP41:%.*]] = load %struct.S*, %struct.S** [[_TMP4]], align 8 +// CHECK8-NEXT: store %struct.S* [[TMP41]], %struct.S** [[TMP40]], align 8 +// CHECK8-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK8-NEXT: [[TMP43:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK8-NEXT: store i32 [[TMP43]], i32* [[TMP42]], align 8 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK8-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] // CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) -// CHECK8-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN12]], i64 2 +// CHECK8-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) +// CHECK8-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP37]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP48]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done13: +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done7: // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK8-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC5:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR6:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK8-NEXT: [[VAR8:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK8-NEXT: [[_TMP9:%.*]] = alloca %struct.S*, align 8 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca %struct.S*, align 8 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK8-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK8-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK8-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* -// CHECK8-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 +// CHECK8-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK8-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK8-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 8 +// CHECK8-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK8-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 8 +// CHECK8-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 7 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK8-NEXT: store i32 [[TMP14]], i32* [[SVAR]], align 4 +// CHECK8-NEXT: store %struct.S* [[TMP12]], %struct.S** [[TMP]], align 8 // CHECK8-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK8-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK8-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK8-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK8-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK8-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK8-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC5]] to i8* -// CHECK8-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR6]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK8-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: [[TMP17:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK8-NEXT: [[TMP18:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP19:%.*]] = bitcast [2 x %struct.S]* [[TMP10]] to %struct.S* +// CHECK8-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP20]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK8-NEXT: [[TMP9:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK8-NEXT: [[TMP10:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i64 4, i1 false) +// CHECK8-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK8-NEXT: [[TMP22:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i64 4, i1 false) // CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done7: -// CHECK8-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK8-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[VAR8]] to i8* -// CHECK8-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[TMP11]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false) -// CHECK8-NEXT: store %struct.S* [[VAR8]], %struct.S** [[_TMP9]], align 8 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP20]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP23:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK8-NEXT: [[TMP24:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK8-NEXT: [[TMP25:%.*]] = bitcast %struct.S* [[TMP23]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i64 4, i1 false) +// CHECK8-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP27]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP28]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK8-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK8-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 -// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC5]], i64 0, i64 [[IDXPROM]] -// CHECK8-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 -// CHECK8-NEXT: [[TMP24:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8 -// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 -// CHECK8-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK8-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR6]], i64 0, i64 [[IDXPROM11]] -// CHECK8-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[ARRAYIDX12]] to i8* -// CHECK8-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[TMP24]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false) +// CHECK8-NEXT: [[TMP34:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP35:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK8-NEXT: store i32 [[TMP34]], i32* [[ARRAYIDX]], align 4 +// CHECK8-NEXT: [[TMP36:%.*]] = load %struct.S*, %struct.S** [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP37:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK8-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK8-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* +// CHECK8-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[TMP36]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 4, i1 false) // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK8-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK8-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK8-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR8]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR6]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN14]], i64 2 +// CHECK8-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP42]]) +// CHECK8-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP31]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP43]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done15: +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done10: // CHECK8-NEXT: ret void // // @@ -1236,35 +1294,35 @@ // CHECK8-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK8-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK8-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 // CHECK8-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 // CHECK8-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK8-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK8-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 8 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK8-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK8-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK8-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK8-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK8-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK8-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK8-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) -// CHECK8-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 8 -// CHECK8-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 8 +// CHECK8-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK8-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK8-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK8-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK8-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR]], align 8 +// CHECK8-NEXT: store %struct.S.1* [[TMP1]], %struct.S.1** [[TMP]], align 8 // CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK8-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK8-NEXT: store i32 [[TMP2]], i32* [[CONV]], align 4 // CHECK8-NEXT: [[TMP3:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK8-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK8-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK8-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK8-NEXT: [[TMP4:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK8-NEXT: [[TMP6:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 // CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK8-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64* // CHECK8-NEXT: store i64 [[TMP3]], i64* [[TMP8]], align 8 @@ -1282,19 +1340,19 @@ // CHECK8-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK8-NEXT: store i8* null, i8** [[TMP16]], align 8 // CHECK8-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK8-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK8-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 8 +// CHECK8-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.1]** +// CHECK8-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP18]], align 8 // CHECK8-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK8-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.0]** -// CHECK8-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP20]], align 8 +// CHECK8-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.1]** +// CHECK8-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP20]], align 8 // CHECK8-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 // CHECK8-NEXT: store i8* null, i8** [[TMP21]], align 8 // CHECK8-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK8-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK8-NEXT: store %struct.S.0* [[TMP5]], %struct.S.0** [[TMP23]], align 8 +// CHECK8-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.1** +// CHECK8-NEXT: store %struct.S.1* [[TMP5]], %struct.S.1** [[TMP23]], align 8 // CHECK8-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK8-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.0** -// CHECK8-NEXT: store %struct.S.0* [[TMP6]], %struct.S.0** [[TMP25]], align 8 +// CHECK8-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.1** +// CHECK8-NEXT: store %struct.S.1* [[TMP6]], %struct.S.1** [[TMP25]], align 8 // CHECK8-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 // CHECK8-NEXT: store i8* null, i8** [[TMP26]], align 8 // CHECK8-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -1304,21 +1362,21 @@ // CHECK8-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK8-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK8: omp_offload.failed: -// CHECK8-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]] +// CHECK8-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.1]* [[S_ARR]], %struct.S.1* [[TMP4]]) #[[ATTR4]] // CHECK8-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK8: omp_offload.cont: // CHECK8-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK8: arraydestroy.done2: -// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK8-NEXT: [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK8-NEXT: ret i32 [[TMP32]] // @@ -1358,359 +1416,381 @@ // // // CHECK8-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK8-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK8-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK8-NEXT: entry: -// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK8-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK8-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK8-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK8-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK8-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK8-NEXT: entry: -// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK8-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK8-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK8-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK8-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK8-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48 -// CHECK8-SAME: (i64 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (i64 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.1]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 // CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 8 +// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK8-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK8-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK8-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 8 +// CHECK8-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 8 // CHECK8-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* // CHECK8-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[CONV]], [2 x i32]* [[TMP0]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK8-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 8 +// CHECK8-NEXT: [[TMP2:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 8 +// CHECK8-NEXT: store %struct.S.1* [[TMP2]], %struct.S.1** [[TMP]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store [2 x %struct.S.1]* [[TMP1]], [2 x %struct.S.1]** [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP7:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK8-NEXT: store %struct.S.1* [[TMP7]], %struct.S.1** [[TMP6]], align 8 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK8-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 +// CHECK8-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK8-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK8-NEXT: [[_TMP8:%.*]] = alloca %struct.S.0*, align 8 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca %struct.S.1*, align 8 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK8-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 8 -// CHECK8-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK8-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[_TMP1]], align 8 +// CHECK8-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 8 +// CHECK8-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP]], align 8 +// CHECK8-NEXT: [[TMP9:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK8-NEXT: store %struct.S.1* [[TMP9]], %struct.S.1** [[_TMP1]], align 8 // CHECK8-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK8-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK8-NEXT: store i32 [[TMP5]], i32* [[T_VAR3]], align 4 -// CHECK8-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK8-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP6]], i8* align 4 [[TMP7]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP8:%.*]] = bitcast [2 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK8-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP9]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK8-NEXT: store i32 [[TMP10]], i32* [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK8-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S.1]* [[TMP6]] to %struct.S.1* +// CHECK8-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP8]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK8-NEXT: [[TMP10:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK8-NEXT: [[TMP11:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP10]], i8* align 4 [[TMP11]], i64 4, i1 false) -// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done6: -// CHECK8-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 -// CHECK8-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[VAR7]] to i8* -// CHECK8-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false) -// CHECK8-NEXT: store %struct.S.0* [[VAR7]], %struct.S.0** [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[TMP15:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK8-NEXT: [[TMP16:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i64 4, i1 false) +// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP17:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP1]], align 8 +// CHECK8-NEXT: [[TMP18:%.*]] = bitcast %struct.S.1* [[VAR]] to i8* +// CHECK8-NEXT: [[TMP19:%.*]] = bitcast %struct.S.1* [[TMP17]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) +// CHECK8-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK8-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK8-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK8-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK8-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 4 -// CHECK8-NEXT: [[TMP27:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK8-NEXT: [[TMP28:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP23]], i64 [[TMP25]], [2 x i32]* [[VEC4]], i64 [[TMP27]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP28]]) +// CHECK8-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK8-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK8-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP30:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK8-NEXT: store i64 [[TMP30]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK8-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP31]], align 8 +// CHECK8-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP32]], align 8 +// CHECK8-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP33]], align 8 +// CHECK8-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP35:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK8-NEXT: store i32 [[TMP35]], i32* [[TMP34]], align 8 +// CHECK8-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK8-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP36]], align 8 +// CHECK8-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK8-NEXT: [[TMP38:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP4]], align 8 +// CHECK8-NEXT: store %struct.S.1* [[TMP38]], %struct.S.1** [[TMP37]], align 8 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK8-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) -// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i64 2 +// CHECK8-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP42]]) +// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP33]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done11: +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP43]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done7: // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK8-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK8-NEXT: [[_TMP8:%.*]] = alloca %struct.S.0*, align 8 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca %struct.S.1*, align 8 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK8-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK8-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK8-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK8-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP9]], align 8 +// CHECK8-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK8-NEXT: [[TMP12:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP11]], align 8 +// CHECK8-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP]], align 8 // CHECK8-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK8-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK8-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK8-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK8-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK8-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK8-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK8-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK8-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: [[TMP15:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK8-NEXT: [[TMP16:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S.1]* [[TMP10]] to %struct.S.1* +// CHECK8-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK8-NEXT: [[TMP9:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK8-NEXT: [[TMP10:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i64 4, i1 false) -// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done6: -// CHECK8-NEXT: [[TMP11:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK8-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[VAR7]] to i8* -// CHECK8-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP11]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false) -// CHECK8-NEXT: store %struct.S.0* [[VAR7]], %struct.S.0** [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP17]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[TMP19:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK8-NEXT: [[TMP20:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 4, i1 false) +// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP21:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK8-NEXT: [[TMP22:%.*]] = bitcast %struct.S.1* [[VAR]] to i8* +// CHECK8-NEXT: [[TMP23:%.*]] = bitcast %struct.S.1* [[TMP21]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) +// CHECK8-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP26]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK8-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK8-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 -// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK8-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 -// CHECK8-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 -// CHECK8-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK8-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK8-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[ARRAYIDX11]] to i8* -// CHECK8-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[TMP24]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false) +// CHECK8-NEXT: [[TMP32:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP33:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK8-NEXT: store i32 [[TMP32]], i32* [[ARRAYIDX]], align 4 +// CHECK8-NEXT: [[TMP34:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP35:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK8-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK8-NEXT: [[TMP36:%.*]] = bitcast %struct.S.1* [[ARRAYIDX7]] to i8* +// CHECK8-NEXT: [[TMP37:%.*]] = bitcast %struct.S.1* [[TMP34]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i64 4, i1 false) // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK8-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], 1 +// CHECK8-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN13]], i64 2 +// CHECK8-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) +// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN9]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done14: +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP41]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done10: // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK8-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK8-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK8-NEXT: entry: -// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK8-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK8-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK8-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK8-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK8-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK8-NEXT: entry: -// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK8-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK8-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK8-NEXT: store i32 0, i32* [[F]], align 4 // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK8-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK8-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK8-NEXT: entry: -// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK8-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK8-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK8-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK8-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK8-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK8-NEXT: entry: -// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK8-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK8-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK8-NEXT: ret void // // @@ -1860,6 +1940,7 @@ // CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK10-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK10-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK10-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK10-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1869,21 +1950,27 @@ // CHECK10-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 // CHECK10-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[T_VAR_ADDR]], [2 x i32]* [[TMP0]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[SVAR_ADDR]]) +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK10-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK10-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK10-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1892,136 +1979,147 @@ // CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK10-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK10-NEXT: [[_TMP8:%.*]] = alloca %struct.S*, align 4 -// CHECK10-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK10-NEXT: [[_TMP4:%.*]] = alloca %struct.S*, align 4 +// CHECK10-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK10-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK10-NEXT: store %struct.S* [[TMP5]], %struct.S** [[_TMP1]], align 4 +// CHECK10-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK10-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK10-NEXT: store %struct.S* [[TMP11]], %struct.S** [[_TMP1]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK10-NEXT: store i32 [[TMP6]], i32* [[T_VAR3]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK10-NEXT: [[TMP8:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP7]], i8* align 4 [[TMP8]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP9:%.*]] = bitcast [2 x %struct.S]* [[TMP2]] to %struct.S* -// CHECK10-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP10]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK10-NEXT: store i32 [[TMP12]], i32* [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK10-NEXT: [[TMP14:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP15:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK10-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP16]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK10-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK10-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 4, i1 false) +// CHECK10-NEXT: [[TMP17:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK10-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i32 4, i1 false) // CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP10]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done6: -// CHECK10-NEXT: [[TMP13:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK10-NEXT: [[TMP15:%.*]] = bitcast %struct.S* [[TMP13]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false) -// CHECK10-NEXT: store %struct.S* [[VAR7]], %struct.S** [[_TMP8]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK10-NEXT: store i32 [[TMP16]], i32* [[SVAR9]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP18]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done3: +// CHECK10-NEXT: [[TMP19:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK10-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[TMP19]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) +// CHECK10-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP4]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK10-NEXT: store i32 [[TMP22]], i32* [[SVAR]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP24]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP25]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] -// CHECK10-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] +// CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK10-NEXT: store i32 [[TMP26]], i32* [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 4 -// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[SVAR9]], align 4 -// CHECK10-NEXT: store i32 [[TMP29]], i32* [[SVAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP24]], i32 [[TMP25]], [2 x i32]* [[VEC4]], i32 [[TMP27]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP28]], i32 [[TMP30]]) +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP30]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK10-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP32]], align 4 +// CHECK10-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP33]], align 4 +// CHECK10-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP34]], align 4 +// CHECK10-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP36:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK10-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 4 +// CHECK10-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK10-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP37]], align 4 +// CHECK10-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP39:%.*]] = load %struct.S*, %struct.S** [[_TMP4]], align 4 +// CHECK10-NEXT: store %struct.S* [[TMP39]], %struct.S** [[TMP38]], align 4 +// CHECK10-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK10-NEXT: [[TMP41:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK10-NEXT: store i32 [[TMP41]], i32* [[TMP40]], align 4 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK10-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) -// CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i32 2 +// CHECK10-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP45]]) +// CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP35]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP46]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done12: +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done7: // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK10-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2029,120 +2127,128 @@ // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK10-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK10-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK10-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 +// CHECK10-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK10-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK10-NEXT: store i32 [[TMP14]], i32* [[SVAR]], align 4 +// CHECK10-NEXT: store %struct.S* [[TMP12]], %struct.S** [[TMP]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK10-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK10-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK10-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: [[TMP17:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK10-NEXT: [[TMP18:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP19:%.*]] = bitcast [2 x %struct.S]* [[TMP10]] to %struct.S* +// CHECK10-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP20]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK10-NEXT: [[TMP9:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK10-NEXT: [[TMP10:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i32 4, i1 false) +// CHECK10-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK10-NEXT: [[TMP22:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 4, i1 false) // CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done4: -// CHECK10-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[VAR5]] to i8* -// CHECK10-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[TMP11]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 4, i1 false) -// CHECK10-NEXT: store %struct.S* [[VAR5]], %struct.S** [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP20]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done2: +// CHECK10-NEXT: [[TMP23:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK10-NEXT: [[TMP24:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK10-NEXT: [[TMP25:%.*]] = bitcast %struct.S* [[TMP23]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i32 4, i1 false) +// CHECK10-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP27]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP28]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] +// CHECK10-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP23]] -// CHECK10-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP24:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 -// CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 [[TMP25]] -// CHECK10-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[ARRAYIDX8]] to i8* -// CHECK10-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[TMP24]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false) +// CHECK10-NEXT: [[TMP34:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP35]] +// CHECK10-NEXT: store i32 [[TMP34]], i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: [[TMP36:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4 +// CHECK10-NEXT: [[TMP37:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP37]] +// CHECK10-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8* +// CHECK10-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[TMP36]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i32 4, i1 false) // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK10-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i32 2 +// CHECK10-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP42]]) +// CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP31]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP43]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done11: +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done8: // CHECK10-NEXT: ret void // // @@ -2160,34 +2266,34 @@ // CHECK10-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK10-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK10-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK10-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK10-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK10-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK10-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK10-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK10-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK10-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK10-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK10-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK10-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK10-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 4 -// CHECK10-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 4 +// CHECK10-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK10-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK10-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK10-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR]], align 4 +// CHECK10-NEXT: store %struct.S.1* [[TMP1]], %struct.S.1** [[TMP]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[T_VAR_CASTED]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK10-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[TMP8]], align 4 @@ -2205,19 +2311,19 @@ // CHECK10-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK10-NEXT: store i8* null, i8** [[TMP16]], align 4 // CHECK10-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK10-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK10-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.1]** +// CHECK10-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP18]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK10-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.0]** -// CHECK10-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP20]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.1]** +// CHECK10-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP20]], align 4 // CHECK10-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 // CHECK10-NEXT: store i8* null, i8** [[TMP21]], align 4 // CHECK10-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK10-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK10-NEXT: store %struct.S.0* [[TMP5]], %struct.S.0** [[TMP23]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.1** +// CHECK10-NEXT: store %struct.S.1* [[TMP5]], %struct.S.1** [[TMP23]], align 4 // CHECK10-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK10-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.0** -// CHECK10-NEXT: store %struct.S.0* [[TMP6]], %struct.S.0** [[TMP25]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.1** +// CHECK10-NEXT: store %struct.S.1* [[TMP6]], %struct.S.1** [[TMP25]], align 4 // CHECK10-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 // CHECK10-NEXT: store i8* null, i8** [[TMP26]], align 4 // CHECK10-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -2227,21 +2333,21 @@ // CHECK10-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK10-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK10: omp_offload.failed: -// CHECK10-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]] +// CHECK10-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.1]* [[S_ARR]], %struct.S.1* [[TMP4]]) #[[ATTR4]] // CHECK10-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK10: omp_offload.cont: // CHECK10-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK10: arraydestroy.done2: -// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK10-NEXT: ret i32 [[TMP32]] // @@ -2281,350 +2387,374 @@ // // // CHECK10-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK10-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK10-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK10-NEXT: entry: -// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK10-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK10-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK10-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK10-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK10-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK10-NEXT: entry: -// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK10-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK10-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK10-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK10-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48 -// CHECK10-SAME: (i32 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (i32 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.1]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 4 +// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK10-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK10-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 +// CHECK10-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK10-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 4 // CHECK10-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[T_VAR_ADDR]], [2 x i32]* [[TMP0]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK10-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK10-NEXT: [[TMP2:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 4 +// CHECK10-NEXT: store %struct.S.1* [[TMP2]], %struct.S.1** [[TMP]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store [2 x %struct.S.1]* [[TMP1]], [2 x %struct.S.1]** [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP7:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK10-NEXT: store %struct.S.1* [[TMP7]], %struct.S.1** [[TMP6]], align 4 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 -// CHECK10-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 +// CHECK10-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK10-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK10-NEXT: [[_TMP8:%.*]] = alloca %struct.S.0*, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK10-NEXT: [[_TMP4:%.*]] = alloca %struct.S.1*, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK10-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[_TMP1]], align 4 +// CHECK10-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 4 +// CHECK10-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK10-NEXT: store %struct.S.1* [[TMP9]], %struct.S.1** [[_TMP1]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK10-NEXT: store i32 [[TMP5]], i32* [[T_VAR3]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK10-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 [[TMP7]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP8:%.*]] = bitcast [2 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP9]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK10-NEXT: store i32 [[TMP10]], i32* [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK10-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S.1]* [[TMP6]] to %struct.S.1* +// CHECK10-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP8]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK10-NEXT: [[TMP10:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK10-NEXT: [[TMP11:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 [[TMP11]], i32 4, i1 false) -// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done6: -// CHECK10-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[VAR7]] to i8* -// CHECK10-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false) -// CHECK10-NEXT: store %struct.S.0* [[VAR7]], %struct.S.0** [[_TMP8]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[TMP15:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK10-NEXT: [[TMP16:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i32 4, i1 false) +// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done3: +// CHECK10-NEXT: [[TMP17:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP1]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = bitcast %struct.S.1* [[VAR]] to i8* +// CHECK10-NEXT: [[TMP19:%.*]] = bitcast %struct.S.1* [[TMP17]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) +// CHECK10-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP4]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK10-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK10-NEXT: store i32 [[TMP24]], i32* [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP26:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 4 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP22]], i32 [[TMP23]], [2 x i32]* [[VEC4]], i32 [[TMP25]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP26]]) +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP27]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: store i32 [[TMP28]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK10-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP29]], align 4 +// CHECK10-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP30]], align 4 +// CHECK10-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP31]], align 4 +// CHECK10-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK10-NEXT: store i32 [[TMP33]], i32* [[TMP32]], align 4 +// CHECK10-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK10-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP34]], align 4 +// CHECK10-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP36:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP4]], align 4 +// CHECK10-NEXT: store %struct.S.1* [[TMP36]], %struct.S.1** [[TMP35]], align 4 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK10-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i32 2 +// CHECK10-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) +// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done11: +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP41]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done7: // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK10-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK10-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK10-NEXT: [[_TMP3:%.*]] = alloca %struct.S.1*, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK10-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK10-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP9]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP12:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP11]], align 4 +// CHECK10-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK10-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK10-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK10-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: [[TMP15:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK10-NEXT: [[TMP16:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S.1]* [[TMP10]] to %struct.S.1* +// CHECK10-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK10-NEXT: [[TMP9:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK10-NEXT: [[TMP10:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i32 4, i1 false) -// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done4: -// CHECK10-NEXT: [[TMP11:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[VAR5]] to i8* -// CHECK10-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP11]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 4, i1 false) -// CHECK10-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP17]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[TMP19:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK10-NEXT: [[TMP20:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i32 4, i1 false) +// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done2: +// CHECK10-NEXT: [[TMP21:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = bitcast %struct.S.1* [[VAR]] to i8* +// CHECK10-NEXT: [[TMP23:%.*]] = bitcast %struct.S.1* [[TMP21]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false) +// CHECK10-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP3]], align 4 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP26]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK10-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP23]] -// CHECK10-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 -// CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP25]] -// CHECK10-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* -// CHECK10-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[TMP24]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false) +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP33]] +// CHECK10-NEXT: store i32 [[TMP32]], i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: [[TMP34:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 4 +// CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP35]] +// CHECK10-NEXT: [[TMP36:%.*]] = bitcast %struct.S.1* [[ARRAYIDX5]] to i8* +// CHECK10-NEXT: [[TMP37:%.*]] = bitcast %struct.S.1* [[TMP34]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i32 4, i1 false) // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP38]], 1 +// CHECK10-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i32 2 +// CHECK10-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) +// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN7]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done11: +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP41]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done8: // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK10-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK10-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK10-NEXT: entry: -// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK10-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK10-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK10-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK10-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK10-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK10-NEXT: entry: -// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK10-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK10-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK10-NEXT: store i32 0, i32* [[F]], align 4 // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK10-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK10-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK10-NEXT: entry: -// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK10-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK10-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK10-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK10-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK10-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK10-NEXT: entry: -// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK10-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK10-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK10-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp @@ -147,77 +147,89 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l47 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -227,139 +239,155 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l52 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -369,43 +397,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: call void @_Z9gtid_testv() @@ -413,14 +445,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -482,77 +514,89 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -562,43 +606,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn4v() @@ -606,96 +654,108 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -705,43 +765,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn5v() @@ -749,14 +813,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -764,96 +828,109 @@ // CHECK1-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARG_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -863,43 +940,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn6v() @@ -907,14 +988,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -974,77 +1055,89 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l63 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1054,43 +1147,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn1v() @@ -1098,96 +1195,108 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l69 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..12 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1197,43 +1306,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn2v() @@ -1241,14 +1354,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -1256,96 +1369,109 @@ // CHECK1-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK1-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARG_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1355,43 +1481,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn3v() @@ -1399,14 +1529,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp @@ -230,6 +230,7 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 @@ -239,20 +240,25 @@ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* // CHECK1-NEXT: store double* [[CONV1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[CONV]], double* [[TMP0]], i32* [[CONV2]], float* [[CONV3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[CONV]], double** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP2]], double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[CONV3]], float** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -261,100 +267,114 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store double* [[G]], double** [[G_ADDR]], align 8 -// CHECK1-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store double* [[TMP1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[TMP4]], double** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK1-NEXT: store double* [[TMP4]], double** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP9]], double** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load double*, double** [[_TMP1]], align 8 -// CHECK1-NEXT: store double* [[G14]], double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP1]], align 8 +// CHECK1-NEXT: store double* [[G1]], double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, double*, double*, i32*, float*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP14]], i64 [[TMP16]], double* [[G3]], double* [[TMP17]], i32* [[SVAR6]], float* [[SFVAR7]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store double* [[G]], double** [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[_TMP3]], align 8 +// CHECK1-NEXT: store double* [[TMP26]], double** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP28]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP22]], double* [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[TMP23]], align 8 -// CHECK1-NEXT: store volatile double [[TMP24]], double* [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP25]], i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load float, float* [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP26]], float* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load double, double* [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP33]], double* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load double*, double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, double* [[TMP34]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], double* [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP37]], float* [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -362,105 +382,109 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store double* [[G]], double** [[G_ADDR]], align 8 -// CHECK1-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store double* [[TMP1]], double** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double*, double** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP11]], align 8 +// CHECK1-NEXT: store double* [[TMP8]], double** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[G14]], double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[G1]], double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: store double 1.000000e+00, double* [[G3]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP15]], align 8 -// CHECK1-NEXT: store i32 3, i32* [[SVAR6]], align 4 -// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[G3]], double** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK1-NEXT: store double* [[TMP18]], double** [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[SVAR6]], i32** [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[SFVAR7]], float** [[TMP20]], align 8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK1-NEXT: store double 1.000000e+00, double* [[G]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP3]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP24]], align 8 +// CHECK1-NEXT: store i32 3, i32* [[SVAR]], align 4 +// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[G]], double** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load double*, double** [[_TMP3]], align 8 +// CHECK1-NEXT: store double* [[TMP27]], double** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP29]], align 8 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP24]], double* [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load double, double* [[TMP25]], align 8 -// CHECK1-NEXT: store volatile double [[TMP26]], double* [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP27]], i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load float, float* [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP28]], float* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load double, double* [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP33]], double* [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load double*, double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, double* [[TMP34]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], double* [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], i32* [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP37]], float* [[TMP12]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -502,6 +526,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 @@ -516,20 +541,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, double* [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], double* [[G13]], align 8 // CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[G2]], double* [[TMP5]], i32* [[SVAR_ADDR]], float* [[CONV]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G2]], double** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load double*, double** [[_TMP4]], align 4 +// CHECK3-NEXT: store double* [[TMP7]], double** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[CONV]], float** [[TMP9]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -538,98 +568,112 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store double* [[TMP1]], double** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[TMP4]], double** [[_TMP1]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK3-NEXT: store double* [[TMP4]], double** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[TMP9]], double** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double*, double** [[_TMP1]], align 4 -// CHECK3-NEXT: store double* [[G14]], double** [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP1]], align 4 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load double*, double** [[_TMP5]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, double*, i32*, float*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP13]], i32 [[TMP14]], double* [[G3]], double* [[TMP15]], i32* [[SVAR6]], float* [[SFVAR7]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store double* [[G]], double** [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP3]], align 4 +// CHECK3-NEXT: store double* [[TMP24]], double** [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP26]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK3-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP20:%.*]] = load double, double* [[G3]], align 8 -// CHECK3-NEXT: store volatile double [[TMP20]], double* [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = load double*, double** [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[TMP21]], align 4 -// CHECK3-NEXT: store volatile double [[TMP22]], double* [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR6]], align 4 -// CHECK3-NEXT: store i32 [[TMP23]], i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR7]], align 4 -// CHECK3-NEXT: store float [[TMP24]], float* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP31]], double* [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP32:%.*]] = load double*, double** [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load double, double* [[TMP32]], align 4 +// CHECK3-NEXT: store volatile double [[TMP33]], double* [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP34]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP35]], float* [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -637,103 +681,107 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store double* [[TMP1]], double** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double*, double** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP11]], align 4 +// CHECK3-NEXT: store double* [[TMP8]], double** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: store double 1.000000e+00, double* [[G2]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP15]], align 4 -// CHECK3-NEXT: store i32 3, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G2]], double** [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP18:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: store double* [[TMP18]], double** [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR5]], i32** [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[SFVAR6]], float** [[TMP20]], align 4 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK3-NEXT: store double 1.000000e+00, double* [[G]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP24]], align 4 +// CHECK3-NEXT: store i32 3, i32* [[SVAR]], align 4 +// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: store double* [[TMP27]], double** [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP29]], align 4 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP24]], double* [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP25:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load double, double* [[TMP25]], align 4 -// CHECK3-NEXT: store volatile double [[TMP26]], double* [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP27]], i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load float, float* [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP28]], float* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP33]], double* [[TMP6]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, double* [[TMP34]], align 4 +// CHECK3-NEXT: store volatile double [[TMP35]], double* [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP36]], i32* [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP37]], float* [[TMP12]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -888,6 +936,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -899,21 +948,27 @@ // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK9-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], [2 x i32]* [[TMP0]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[CONV1]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP8]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -922,33 +977,38 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: store %struct.S* [[TMP5]], %struct.S** [[_TMP1]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP11]], %struct.S** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -958,112 +1018,122 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store %struct.S* [[VAR6]], %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i32*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC4]], i32* [[T_VAR3]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP18]], i32* [[SVAR8]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP30:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP30]], %struct.S** [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store i32* [[SVAR]], i32** [[TMP31]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK9-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast [2 x %struct.S]* [[S_ARR5]] to %struct.S* -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN10]], [[TMP29]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP38]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP39:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK9-NEXT: [[TMP40:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP39]], i8* align 4 [[TMP40]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP41:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN5]], [[TMP42]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP28]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP41]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP44:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP43]], i8* align 4 [[TMP44]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done11: -// CHECK9-NEXT: [[TMP32:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[TMP6]] to i8* -// CHECK9-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP32]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP35]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP42]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done6: +// CHECK9-NEXT: [[TMP45:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[TMP12]] to i8* +// CHECK9-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[TMP45]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP48]], i32* [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN12]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP49]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done13: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done8: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1071,39 +1141,43 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP12]], %struct.S** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1113,108 +1187,108 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store %struct.S* [[VAR6]], %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* -// CHECK9-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[TMP18]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP26]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* +// CHECK9-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[TMP28]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP27]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP30:%.*]] = bitcast [2 x %struct.S]* [[S_ARR5]] to %struct.S* -// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN13]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN13]], [[TMP31]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP37]], i32* [[TMP8]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK9-NEXT: [[TMP39:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP40:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN8]], [[TMP41]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP30]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP42:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP31]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done14: -// CHECK9-NEXT: [[TMP34:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[TMP7]] to i8* -// CHECK9-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[TMP34]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP37]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP41]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP44:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[TMP17]] to i8* +// CHECK9-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[TMP44]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP47]], i32* [[TMP14]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN15]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP48]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done16: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1232,35 +1306,35 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) -// CHECK9-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK9-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP1]], %struct.S.1** [[TMP]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64* // CHECK9-NEXT: store i64 [[TMP3]], i64* [[TMP8]], align 8 @@ -1278,19 +1352,19 @@ // CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK9-NEXT: store i8* null, i8** [[TMP16]], align 8 // CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK9-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.1]** +// CHECK9-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP18]], align 8 // CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK9-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.0]** -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.1]** +// CHECK9-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP20]], align 8 // CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 // CHECK9-NEXT: store i8* null, i8** [[TMP21]], align 8 // CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK9-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK9-NEXT: store %struct.S.0* [[TMP5]], %struct.S.0** [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.1** +// CHECK9-NEXT: store %struct.S.1* [[TMP5]], %struct.S.1** [[TMP23]], align 8 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK9-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.0** -// CHECK9-NEXT: store %struct.S.0* [[TMP6]], %struct.S.0** [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.1** +// CHECK9-NEXT: store %struct.S.1* [[TMP6]], %struct.S.1** [[TMP25]], align 8 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 // CHECK9-NEXT: store i8* null, i8** [[TMP26]], align 8 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -1300,21 +1374,21 @@ // CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK9-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: -// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]] +// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.1]* [[S_ARR]], %struct.S.1* [[TMP4]]) #[[ATTR4]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done2: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK9-NEXT: ret i32 [[TMP32]] // @@ -1354,393 +1428,417 @@ // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 -// CHECK9-SAME: (i64 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i64 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.1]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 8 +// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK9-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[CONV]], [2 x i32]* [[TMP0]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP2]], %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S.1]* [[TMP1]], [2 x %struct.S.1]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP7]], %struct.S.1** [[TMP6]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[_TMP1]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP9]], %struct.S.1** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: -// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store %struct.S.0* [[VAR6]], %struct.S.0** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP14]], i64 [[TMP16]], [2 x i32]* [[VEC4]], i32* [[T_VAR3]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP17]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK9-NEXT: store i64 [[TMP19]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP28:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP28]], %struct.S.1** [[TMP27]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP24]], i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK9-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR5]] to %struct.S.0* -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN9]], [[TMP28]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP35]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK9-NEXT: [[TMP37:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN5]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN5]], [[TMP39]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP27]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done10: -// CHECK9-NEXT: [[TMP31:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[TMP5]] to i8* -// CHECK9-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[TMP31]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP40:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP41:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP39]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done6: +// CHECK9-NEXT: [[TMP42:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = bitcast %struct.S.1* [[TMP10]] to i8* +// CHECK9-NEXT: [[TMP44:%.*]] = bitcast %struct.S.1* [[TMP42]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP43]], i8* align 4 [[TMP44]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP34]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done12: +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP45]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done8: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP11]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: -// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store %struct.S.0* [[VAR6]], %struct.S.0** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[ARRAYIDX10]] to i8* -// CHECK9-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[TMP17]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: [[TMP28:%.*]] = bitcast %struct.S.1* [[ARRAYIDX6]] to i8* +// CHECK9-NEXT: [[TMP29:%.*]] = bitcast %struct.S.1* [[TMP26]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP26]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR5]] to %struct.S.0* -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN12]], [[TMP30]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP35]], i32* [[TMP8]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK9-NEXT: [[TMP37:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN8]], [[TMP39]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP33:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[TMP6]] to i8* -// CHECK9-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[TMP33]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP40:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP41:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP39]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP42:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = bitcast %struct.S.1* [[TMP15]] to i8* +// CHECK9-NEXT: [[TMP44:%.*]] = bitcast %struct.S.1* [[TMP42]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP43]], i8* align 4 [[TMP44]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP45]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, i32* [[F]], align 4 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1891,6 +1989,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1900,21 +1999,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 // CHECK11-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[T_VAR_ADDR]], [2 x i32]* [[TMP0]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1923,33 +2028,38 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP5]], %struct.S** [[_TMP1]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP11]], %struct.S** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1959,110 +2069,120 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store %struct.S* [[VAR6]], %struct.S** [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC4]], i32* [[T_VAR3]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP16]], i32* [[SVAR8]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP28]], %struct.S** [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store i32* [[SVAR]], i32** [[TMP29]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP23]], i32* [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK11-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = bitcast [2 x %struct.S]* [[S_ARR5]] to %struct.S* -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN10]], [[TMP27]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP36]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK11-NEXT: [[TMP38:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP39:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN5]], [[TMP40]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP26]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP39]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP41:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP42:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP41]], i8* align 4 [[TMP42]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP30:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[TMP6]] to i8* -// CHECK11-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[TMP30]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK11-NEXT: store i32 [[TMP33]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP40]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done6: +// CHECK11-NEXT: [[TMP43:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = bitcast %struct.S* [[TMP12]] to i8* +// CHECK11-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[TMP43]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP44]], i8* align 4 [[TMP45]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP46]], i32* [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP34]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP47]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done8: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2070,37 +2190,41 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP12]], %struct.S** [[TMP]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -2110,106 +2234,106 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store %struct.S* [[VAR5]], %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* -// CHECK11-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[TMP18]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP29]] +// CHECK11-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK11-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[TMP28]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP27]], i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast [2 x %struct.S]* [[S_ARR4]] to %struct.S* -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN11]], [[TMP31]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP37]], i32* [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK11-NEXT: [[TMP39:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP40:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN6]], [[TMP41]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP30]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP42:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP31]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP34:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[TMP7]] to i8* -// CHECK11-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[TMP34]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP37]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP41]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP44:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[TMP17]] to i8* +// CHECK11-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[TMP44]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP47]], i32* [[TMP14]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP48]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -2227,34 +2351,34 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK11-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK11-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK11-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK11-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP1]], %struct.S.1** [[TMP]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK11-NEXT: store i32 [[TMP2]], i32* [[T_VAR_CASTED]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32* // CHECK11-NEXT: store i32 [[TMP3]], i32* [[TMP8]], align 4 @@ -2272,19 +2396,19 @@ // CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK11-NEXT: store i8* null, i8** [[TMP16]], align 4 // CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.1]** +// CHECK11-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP18]], align 4 // CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.0]** -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.1]** +// CHECK11-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP20]], align 4 // CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 // CHECK11-NEXT: store i8* null, i8** [[TMP21]], align 4 // CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK11-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK11-NEXT: store %struct.S.0* [[TMP5]], %struct.S.0** [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.1** +// CHECK11-NEXT: store %struct.S.1* [[TMP5]], %struct.S.1** [[TMP23]], align 4 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK11-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.0** -// CHECK11-NEXT: store %struct.S.0* [[TMP6]], %struct.S.0** [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.1** +// CHECK11-NEXT: store %struct.S.1* [[TMP6]], %struct.S.1** [[TMP25]], align 4 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 // CHECK11-NEXT: store i8* null, i8** [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -2294,21 +2418,21 @@ // CHECK11-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK11-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: -// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]] +// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.1]* [[S_ARR]], %struct.S.1* [[TMP4]]) #[[ATTR4]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done2: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK11-NEXT: ret i32 [[TMP32]] // @@ -2348,386 +2472,410 @@ // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 -// CHECK11-SAME: (i32 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.1]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 4 +// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 +// CHECK11-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[T_VAR_ADDR]], [2 x i32]* [[TMP0]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP2]], %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S.1]* [[TMP1]], [2 x %struct.S.1]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP7]], %struct.S.1** [[TMP6]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[_TMP1]], align 4 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP9]], %struct.S.1** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: -// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store %struct.S.0* [[VAR6]], %struct.S.0** [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP13]], i32 [[TMP14]], [2 x i32]* [[VEC4]], i32* [[T_VAR3]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP15]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP26:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP26]], %struct.S.1** [[TMP25]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP22]], i32* [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK11-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP23]], i8* align 4 [[TMP24]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR5]] to %struct.S.0* -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN9]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP33]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK11-NEXT: [[TMP35:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN5]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN5]], [[TMP37]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP25]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done10: -// CHECK11-NEXT: [[TMP29:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[TMP5]] to i8* -// CHECK11-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[TMP29]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP36]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP38:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP39:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP37]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done6: +// CHECK11-NEXT: [[TMP40:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = bitcast %struct.S.1* [[TMP10]] to i8* +// CHECK11-NEXT: [[TMP42:%.*]] = bitcast %struct.S.1* [[TMP40]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP41]], i8* align 4 [[TMP42]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN7]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP32]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done12: +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP43]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done8: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP11]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: -// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 [[TMP18]] -// CHECK11-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* -// CHECK11-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[TMP17]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP25]] +// CHECK11-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: [[TMP28:%.*]] = bitcast %struct.S.1* [[ARRAYIDX4]] to i8* +// CHECK11-NEXT: [[TMP29:%.*]] = bitcast %struct.S.1* [[TMP26]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK11-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP26]], i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN10]], [[TMP30]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP35]], i32* [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK11-NEXT: [[TMP37:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN6]], [[TMP39]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP33:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[TMP6]] to i8* -// CHECK11-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[TMP33]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP40:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP41:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP39]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP42:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = bitcast %struct.S.1* [[TMP15]] to i8* +// CHECK11-NEXT: [[TMP44:%.*]] = bitcast %struct.S.1* [[TMP42]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP43]], i8* align 4 [[TMP44]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP45]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, i32* [[F]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp @@ -203,78 +203,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK1-SAME: () #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -284,43 +296,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -330,20 +346,20 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10:[0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10:[0-9]+]] // CHECK1-NEXT: unreachable // // @@ -358,85 +374,98 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i8* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i8* [[CONV]], i8** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i8* [[A]], i8** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8*, i8** [[A_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP0]], align 1 -// CHECK1-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i8, i8* [[TMP2]], align 1 +// CHECK1-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]]) +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -446,43 +475,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -492,20 +525,20 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]] // CHECK1-NEXT: unreachable // // @@ -593,78 +626,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 5) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -674,43 +719,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -720,98 +769,110 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 23) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -821,43 +882,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -867,98 +932,110 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -968,43 +1045,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -1014,70 +1095,77 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: invoke void @_ZN1SC1El(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) @@ -1086,41 +1174,46 @@ // CHECK1-NEXT: [[CALL:%.*]] = invoke noundef signext i8 @_ZN1ScvcEv(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK1-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]] // CHECK1: invoke.cont2: -// CHECK1-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]) +// CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]) // CHECK1-NEXT: call void @_ZN1SD1Ev(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP14]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP14:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP17:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP15:%.*]] = extractvalue { i8*, i32 } [[TMP14]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP15]]) #[[ATTR10]] +// CHECK1-NEXT: [[TMP18:%.*]] = extractvalue { i8*, i32 } [[TMP17]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP18]]) #[[ATTR10]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1130,43 +1223,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -1176,20 +1273,20 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]] // CHECK1-NEXT: unreachable // // @@ -1310,78 +1407,90 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK5-SAME: () #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1391,43 +1500,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -1437,20 +1550,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10:[0-9]+]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10:[0-9]+]] // CHECK5-NEXT: unreachable // // @@ -1465,85 +1578,98 @@ // CHECK5-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i8* [[CONV]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i8* [[CONV]], i8** [[TMP0]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i8* [[A]], i8** [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i8*, i8** [[A_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP0]], align 1 -// CHECK5-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]) -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i8, i8* [[TMP2]], align 1 +// CHECK5-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]]) +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1553,43 +1679,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -1599,20 +1729,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]] // CHECK5-NEXT: unreachable // // @@ -1691,78 +1821,90 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 5) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1772,43 +1914,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -1818,98 +1964,110 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 23) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1919,43 +2077,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -1965,98 +2127,110 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2066,43 +2240,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2112,70 +2290,77 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: // CHECK5-NEXT: invoke void @_ZN1SC1El(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) @@ -2184,41 +2369,46 @@ // CHECK5-NEXT: [[CALL:%.*]] = invoke noundef signext i8 @_ZN1ScvcEv(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK5-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]] // CHECK5: invoke.cont2: -// CHECK5-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]) +// CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]) // CHECK5-NEXT: call void @_ZN1SD1Ev(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK5-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK5-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP13]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP14]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP14:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP17:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP15:%.*]] = extractvalue { i8*, i32 } [[TMP14]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP15]]) #[[ATTR10]] +// CHECK5-NEXT: [[TMP18:%.*]] = extractvalue { i8*, i32 } [[TMP17]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP18]]) #[[ATTR10]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2228,43 +2418,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2274,20 +2468,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]] // CHECK5-NEXT: unreachable // // @@ -2417,78 +2611,90 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2498,43 +2704,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK9-NEXT: invoke void @_Z3foov() @@ -2544,20 +2754,20 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK9-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK9-NEXT: catch i8* null -// CHECK9-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10:[0-9]+]] +// CHECK9-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10:[0-9]+]] // CHECK9-NEXT: unreachable // // @@ -2572,85 +2782,98 @@ // CHECK9-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i8* [[CONV]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i8* [[CONV]], i8** [[TMP0]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i8* [[A]], i8** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i8*, i8** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP0]], align 1 -// CHECK9-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i8, i8* [[TMP2]], align 1 +// CHECK9-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]]) +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2660,43 +2883,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK9-NEXT: invoke void @_Z3foov() @@ -2706,20 +2933,20 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK9-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK9-NEXT: catch i8* null -// CHECK9-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10]] +// CHECK9-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]] // CHECK9-NEXT: unreachable // // @@ -2807,78 +3034,90 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 5) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2888,43 +3127,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK9-NEXT: invoke void @_Z3foov() @@ -2934,98 +3177,110 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK9-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK9-NEXT: catch i8* null -// CHECK9-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10]] +// CHECK9-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]] // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 23) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3035,43 +3290,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK9-NEXT: invoke void @_Z3foov() @@ -3081,98 +3340,110 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK9-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK9-NEXT: catch i8* null -// CHECK9-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10]] +// CHECK9-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]] // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3182,43 +3453,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK9-NEXT: invoke void @_Z3foov() @@ -3228,70 +3503,77 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK9-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK9-NEXT: catch i8* null -// CHECK9-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10]] +// CHECK9-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]] // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: invoke void @_ZN1SC1El(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) @@ -3300,41 +3582,46 @@ // CHECK9-NEXT: [[CALL:%.*]] = invoke noundef i8 @_ZN1ScvcEv(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]] // CHECK9: invoke.cont2: -// CHECK9-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]) +// CHECK9-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]) // CHECK9-NEXT: call void @_ZN1SD1Ev(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK9-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK9-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP14]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP14:%.*]] = landingpad { i8*, i32 } +// CHECK9-NEXT: [[TMP17:%.*]] = landingpad { i8*, i32 } // CHECK9-NEXT: catch i8* null -// CHECK9-NEXT: [[TMP15:%.*]] = extractvalue { i8*, i32 } [[TMP14]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP15]]) #[[ATTR10]] +// CHECK9-NEXT: [[TMP18:%.*]] = extractvalue { i8*, i32 } [[TMP17]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP18]]) #[[ATTR10]] // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3344,43 +3631,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK9-NEXT: invoke void @_Z3foov() @@ -3390,20 +3681,20 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK9-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK9-NEXT: catch i8* null -// CHECK9-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10]] +// CHECK9-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]] // CHECK9-NEXT: unreachable // // @@ -3524,78 +3815,90 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK13-SAME: () #[[ATTR3:[0-9]+]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3605,43 +3908,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK13-NEXT: invoke void @_Z3foov() @@ -3651,20 +3958,20 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK13-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK13-NEXT: catch i8* null -// CHECK13-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10:[0-9]+]] +// CHECK13-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10:[0-9]+]] // CHECK13-NEXT: unreachable // // @@ -3679,85 +3986,98 @@ // CHECK13-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK13-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i8* [[CONV]]) +// CHECK13-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i8* [[CONV]], i8** [[TMP0]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i8* [[A]], i8** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i8*, i8** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP0]], align 1 -// CHECK13-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i8, i8* [[TMP2]], align 1 +// CHECK13-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]]) +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3767,43 +4087,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK13-NEXT: invoke void @_Z3foov() @@ -3813,20 +4137,20 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK13-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK13-NEXT: catch i8* null -// CHECK13-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10]] +// CHECK13-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]] // CHECK13-NEXT: unreachable // // @@ -3905,78 +4229,90 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 5) +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3986,43 +4322,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK13-NEXT: invoke void @_Z3foov() @@ -4032,98 +4372,110 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK13-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK13-NEXT: catch i8* null -// CHECK13-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10]] +// CHECK13-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]] // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 23) +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4133,43 +4485,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK13-NEXT: invoke void @_Z3foov() @@ -4179,98 +4535,110 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK13-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK13-NEXT: catch i8* null -// CHECK13-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10]] +// CHECK13-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]] // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*)) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1) +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4280,43 +4648,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK13-NEXT: invoke void @_Z3foov() @@ -4326,70 +4698,77 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK13-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK13-NEXT: catch i8* null -// CHECK13-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10]] +// CHECK13-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]] // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: // CHECK13-NEXT: invoke void @_ZN1SC1El(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) @@ -4398,41 +4777,46 @@ // CHECK13-NEXT: [[CALL:%.*]] = invoke noundef i8 @_ZN1ScvcEv(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK13-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]] // CHECK13: invoke.cont2: -// CHECK13-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]) +// CHECK13-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]) // CHECK13-NEXT: call void @_ZN1SD1Ev(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK13-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK13-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP13]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP14]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP14:%.*]] = landingpad { i8*, i32 } +// CHECK13-NEXT: [[TMP17:%.*]] = landingpad { i8*, i32 } // CHECK13-NEXT: catch i8* null -// CHECK13-NEXT: [[TMP15:%.*]] = extractvalue { i8*, i32 } [[TMP14]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP15]]) #[[ATTR10]] +// CHECK13-NEXT: [[TMP18:%.*]] = extractvalue { i8*, i32 } [[TMP17]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP18]]) #[[ATTR10]] // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4442,43 +4826,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK13-NEXT: invoke void @_Z3foov() @@ -4488,20 +4876,20 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK13-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK13-NEXT: catch i8* null -// CHECK13-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR10]] +// CHECK13-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]] // CHECK13-NEXT: unreachable // // diff --git a/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp @@ -151,15 +151,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 @@ -167,69 +169,79 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store double* undef, double** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store double* [[G1]], double** [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 @@ -243,77 +255,81 @@ // CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store double* undef, double** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store double* [[G1]], double** [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: store double 1.000000e+00, double* [[G]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP3]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load double*, double** [[_TMP3]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP15]], align 8 // CHECK1-NEXT: store i32 3, i32* [[SVAR]], align 4 // CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[G]], double** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP3]], align 8 -// CHECK1-NEXT: store double* [[TMP13]], double** [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP15]], align 8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[G]], double** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load double*, double** [[_TMP3]], align 8 +// CHECK1-NEXT: store double* [[TMP18]], double** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP20]], align 8 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -340,15 +356,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca double*, align 4 @@ -356,67 +374,77 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store double* undef, double** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP11]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca double*, align 4 @@ -430,75 +458,79 @@ // CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK3-NEXT: store double* undef, double** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK3-NEXT: store double 1.000000e+00, double* [[G]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP2]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP15]], align 4 // CHECK3-NEXT: store i32 3, i32* [[SVAR]], align 4 // CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G]], double** [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP2]], align 4 -// CHECK3-NEXT: store double* [[TMP13]], double** [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP15]], align 4 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: store double* [[TMP18]], double** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP20]], align 4 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK3-NEXT: ret void // // @@ -585,15 +617,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 8 @@ -601,6 +635,8 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -608,8 +644,11 @@ // CHECK9-NEXT: [[_TMP2:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store %struct.S* undef, %struct.S** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 @@ -627,55 +666,61 @@ // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] @@ -685,12 +730,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 8 @@ -707,15 +751,19 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store %struct.S* undef, %struct.S** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -732,67 +780,67 @@ // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 // CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK9-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* -// CHECK9-NEXT: [[TMP15:%.*]] = bitcast %struct.S* [[TMP12]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* +// CHECK9-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[TMP17]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP24]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] @@ -815,23 +863,23 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) -// CHECK9-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK9-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK9-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 8 +// CHECK9-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK9-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2) // CHECK9-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0) // CHECK9-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 @@ -841,17 +889,17 @@ // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done2: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK9-NEXT: ret i32 [[TMP4]] // @@ -891,286 +939,302 @@ // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: -// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done5: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: -// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK9-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* -// CHECK9-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false) +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: [[TMP19:%.*]] = bitcast %struct.S.1* [[ARRAYIDX6]] to i8* +// CHECK9-NEXT: [[TMP20:%.*]] = bitcast %struct.S.1* [[TMP17]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN8]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP19]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP24]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done9: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, i32* [[F]], align 4 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1257,15 +1321,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95 // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 4 @@ -1273,6 +1339,8 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -1280,8 +1348,11 @@ // CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store %struct.S* undef, %struct.S** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 @@ -1299,53 +1370,59 @@ // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP11]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]]) // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] @@ -1355,12 +1432,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 4 @@ -1377,15 +1453,19 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK11-NEXT: store %struct.S* undef, %struct.S** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 @@ -1400,65 +1480,65 @@ // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* -// CHECK11-NEXT: [[TMP15:%.*]] = bitcast %struct.S* [[TMP12]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP16]] +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK11-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK11-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[TMP17]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP24]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1481,23 +1561,23 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK11-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 4 -// CHECK11-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK11-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 4 +// CHECK11-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK11-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2) // CHECK11-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0) // CHECK11-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 @@ -1507,17 +1587,17 @@ // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done2: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK11-NEXT: ret i32 [[TMP4]] // @@ -1557,280 +1637,296 @@ // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK11-SAME: () #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: -// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP11]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]]) +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN4]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done5: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: -// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* -// CHECK11-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP16]] +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK11-NEXT: [[TMP19:%.*]] = bitcast %struct.S.1* [[ARRAYIDX4]] to i8* +// CHECK11-NEXT: [[TMP20:%.*]] = bitcast %struct.S.1* [[TMP17]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP19]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP24]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done7: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, i32* [[F]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp @@ -83,78 +83,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 4) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 4) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -164,135 +176,151 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -302,57 +330,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -374,78 +406,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l29 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -455,57 +499,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp @@ -58,369 +58,387 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i8**)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], i8** [[TMP2]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store i8** [[TMP3]], i8*** [[TMP2]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 // CHECK1-NEXT: store i64 9, i64* [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP6]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP7]], i64 [[TMP8]], i32* [[CONV]], i8** [[TMP9]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[ARGC]], i32** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP17:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: store i8** [[TMP17]], i8*** [[TMP16]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP28:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 9, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP1]], i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP3]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP6]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8*, i8** [[TMP7]], i64 9 -// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP8]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint i8* [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = add nuw i64 [[TMP12]], 1 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP15:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP13]], align 16 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP16]] +// CHECK1-NEXT: store i32 0, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP13]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8*, i8** [[TMP14]], i64 9 +// CHECK1-NEXT: [[TMP15:%.*]] = load i8*, i8** [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP15]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint i8* [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = sub i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP19:%.*]] = sdiv exact i64 [[TMP18]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP20:%.*]] = add nuw i64 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP22]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP20]], align 16 +// CHECK1-NEXT: store i64 [[TMP20]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP23]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP17:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i8*, i8** [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint i8* [[TMP18]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = sub i64 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: [[TMP22:%.*]] = sdiv exact i64 [[TMP21]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP22]] -// CHECK1-NEXT: store i8** [[_TMP6]], i8*** [[_TMP5]], align 8 -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[_TMP6]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i8*, i8** [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = ptrtoint i8* [[TMP25]] to i64 +// CHECK1-NEXT: [[TMP27:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP28:%.*]] = sub i64 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: [[TMP29:%.*]] = sdiv exact i64 [[TMP28]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP29]] +// CHECK1-NEXT: store i8** [[_TMP5]], i8*** [[_TMP4]], align 8 +// CHECK1-NEXT: store i8* [[TMP30]], i8** [[_TMP5]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, i64* [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[TMP32]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP33]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8*, i8** [[TMP35]], i64 0 -// CHECK1-NEXT: [[TMP36:%.*]] = load i8*, i8** [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, i8* [[TMP36]], i64 0 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = sext i32 [[TMP37]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP38]] -// CHECK1-NEXT: [[TMP39:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8*, i8** [[TMP39]], i64 9 -// CHECK1-NEXT: [[TMP40:%.*]] = load i8*, i8** [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, i8* [[TMP40]], i64 [[LB_ADD_LEN10]] -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 -// CHECK1-NEXT: store i8* [[ARRAYIDX9]], i8** [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = ptrtoint i8* [[ARRAYIDX12]] to i64 -// CHECK1-NEXT: [[TMP43:%.*]] = ptrtoint i8* [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP44:%.*]] = sub i64 [[TMP42]], [[TMP43]] -// CHECK1-NEXT: [[TMP45:%.*]] = sdiv exact i64 [[TMP44]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = add nuw i64 [[TMP45]], 1 -// CHECK1-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP47]], i64* [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..2 to i8*), i8** [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..3 to i8*), i8** [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, i32* [[TMP52]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* -// CHECK1-NEXT: [[TMP56:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB2]], i32 [[TMP54]], i32 1, i32 2, i8* [[TMP55]]) -// CHECK1-NEXT: store i8* [[TMP56]], i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP57]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP58]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP59]], 9 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast i32* [[ARGC]] to i8* +// CHECK1-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP6]] to i8* +// CHECK1-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, i64* [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP40:%.*]] = bitcast i32* [[TMP39]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP40]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8*, i8** [[TMP42]], i64 0 +// CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[ARRAYIDX7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, i8* [[TMP43]], i64 0 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = sext i32 [[TMP44]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP45]] +// CHECK1-NEXT: [[TMP46:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8*, i8** [[TMP46]], i64 9 +// CHECK1-NEXT: [[TMP47:%.*]] = load i8*, i8** [[ARRAYIDX10]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[TMP47]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 +// CHECK1-NEXT: store i8* [[ARRAYIDX8]], i8** [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = ptrtoint i8* [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP50:%.*]] = ptrtoint i8* [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP51:%.*]] = sub i64 [[TMP49]], [[TMP50]] +// CHECK1-NEXT: [[TMP52:%.*]] = sdiv exact i64 [[TMP51]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP53:%.*]] = add nuw i64 [[TMP52]], 1 +// CHECK1-NEXT: [[TMP54:%.*]] = mul nuw i64 [[TMP53]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP54]], i64* [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..2 to i8*), i8** [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP57]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..3 to i8*), i8** [[TMP58]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, i32* [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, i32* [[TMP60]], align 4 +// CHECK1-NEXT: [[TMP62:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* +// CHECK1-NEXT: [[TMP63:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB2]], i32 [[TMP61]], i32 1, i32 2, i8* [[TMP62]]) +// CHECK1-NEXT: store i8* [[TMP63]], i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = load i32, i32* [[TMP64]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP65]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP66:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP66]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP60]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP67]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP61]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP68]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP62]], [[TMP63]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP69:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP69]], [[TMP70]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP64:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP64]], 1 +// CHECK1-NEXT: [[TMP71:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP71]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP65]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[ARGC1]], i32** [[TMP66]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP68:%.*]] = load i8**, i8*** [[_TMP5]], align 8 -// CHECK1-NEXT: store i8** [[TMP68]], i8*** [[TMP67]], align 8 -// CHECK1-NEXT: [[TMP69:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = load i32, i32* [[TMP69]], align 4 -// CHECK1-NEXT: [[TMP71:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB2]], i32 [[TMP70]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP72:%.*]] = bitcast i8* [[TMP71]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP72]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP73]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP75:%.*]] = load i8*, i8** [[TMP74]], align 8 -// CHECK1-NEXT: [[TMP76:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP75]], i8* align 8 [[TMP76]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP72]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP77]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP79:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store i8* [[TMP79]], i8** [[TMP78]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, i32* [[TMP80]], align 4 -// CHECK1-NEXT: [[TMP82:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB2]], i32 [[TMP81]], i8* [[TMP71]]) +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP72]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[ARGC]], i32** [[TMP73]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP75:%.*]] = load i8**, i8*** [[_TMP4]], align 8 +// CHECK1-NEXT: store i8** [[TMP75]], i8*** [[TMP74]], align 8 +// CHECK1-NEXT: [[TMP76:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = load i32, i32* [[TMP76]], align 4 +// CHECK1-NEXT: [[TMP78:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB2]], i32 [[TMP77]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP79:%.*]] = bitcast i8* [[TMP78]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP79]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP80]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP82:%.*]] = load i8*, i8** [[TMP81]], align 8 +// CHECK1-NEXT: [[TMP83:%.*]] = bitcast %struct.anon.1* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP82]], i8* align 8 [[TMP83]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP79]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP84]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP86:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store i8* [[TMP86]], i8** [[TMP85]], align 8 +// CHECK1-NEXT: [[TMP87:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, i32* [[TMP87]], align 4 +// CHECK1-NEXT: [[TMP89:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB2]], i32 [[TMP88]], i8* [[TMP78]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP83:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP83]], 1 -// CHECK1-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP90:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP90]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP84:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, i32* [[TMP84]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP85]]) -// CHECK1-NEXT: [[TMP86:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP87:%.*]] = load i32, i32* [[TMP86]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP87]], i32 1) -// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP89:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP89]], i8** [[TMP88]], align 8 -// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP90]], align 8 -// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP13]] to i8* -// CHECK1-NEXT: store i8* [[TMP92]], i8** [[TMP91]], align 8 +// CHECK1-NEXT: [[TMP91:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP92:%.*]] = load i32, i32* [[TMP91]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP92]]) // CHECK1-NEXT: [[TMP93:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP94:%.*]] = load i32, i32* [[TMP93]], align 4 -// CHECK1-NEXT: [[TMP95:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP96:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP94]], i32 2, i64 24, i8* [[TMP95]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP96]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP94]], i32 1) +// CHECK1-NEXT: [[TMP95:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP96:%.*]] = bitcast i32* [[ARGC]] to i8* +// CHECK1-NEXT: store i8* [[TMP96]], i8** [[TMP95]], align 8 +// CHECK1-NEXT: [[TMP97:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP97]], align 8 +// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP99:%.*]] = inttoptr i64 [[TMP20]] to i8* +// CHECK1-NEXT: store i8* [[TMP99]], i8** [[TMP98]], align 8 +// CHECK1-NEXT: [[TMP100:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP101:%.*]] = load i32, i32* [[TMP100]], align 4 +// CHECK1-NEXT: [[TMP102:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP103:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP101]], i32 2, i64 24, i8* [[TMP102]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP103]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP97:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP98:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP97]], [[TMP98]] -// CHECK1-NEXT: store i32 [[ADD15]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP99:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP99]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP104:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP105:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP104]], [[TMP105]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP106:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP106]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP100:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP100]] to i32 -// CHECK1-NEXT: [[TMP101:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP101]] to i32 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]] -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP107:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP107]] to i32 +// CHECK1-NEXT: [[TMP108:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP108]] to i32 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK1-NEXT: store i8 [[CONV18]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP99]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done22: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB4]], i32 [[TMP94]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP106]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done21: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB4]], i32 [[TMP101]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP102:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP103:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP102]] monotonic, align 4 -// CHECK1-NEXT: [[TMP104:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP104]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] -// CHECK1: omp.arraycpy.body24: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP105:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP105]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1 +// CHECK1-NEXT: [[TMP109:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP110:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP109]] monotonic, align 4 +// CHECK1-NEXT: [[TMP111:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP111]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] +// CHECK1: omp.arraycpy.body23: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP112:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP112]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP106:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP111:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP106]], i8* [[_TMP28]], align 1 -// CHECK1-NEXT: [[TMP107:%.*]] = load i8, i8* [[_TMP28]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP107]] to i32 -// CHECK1-NEXT: [[TMP108:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP108]] to i32 -// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]] -// CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK1-NEXT: store i8 [[CONV32]], i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP109:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP110:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP106]], i8 [[TMP109]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP111]] = extractvalue { i8, i1 } [[TMP110]], 0 -// CHECK1-NEXT: [[TMP112:%.*]] = extractvalue { i8, i1 } [[TMP110]], 1 -// CHECK1-NEXT: br i1 [[TMP112]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP113:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP118:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP113]], i8* [[_TMP27]], align 1 +// CHECK1-NEXT: [[TMP114:%.*]] = load i8, i8* [[_TMP27]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP114]] to i32 +// CHECK1-NEXT: [[TMP115:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP115]] to i32 +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] +// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK1-NEXT: store i8 [[CONV31]], i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP116:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP117:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP113]], i8 [[TMP116]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP118]] = extractvalue { i8, i1 } [[TMP117]], 0 +// CHECK1-NEXT: [[TMP119:%.*]] = extractvalue { i8, i1 } [[TMP117]], 1 +// CHECK1-NEXT: br i1 [[TMP119]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP104]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]] -// CHECK1: omp.arraycpy.done36: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP111]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] +// CHECK1: omp.arraycpy.done35: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP113:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP113]]) +// CHECK1-NEXT: [[TMP120:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP120]]) // CHECK1-NEXT: ret void // // @@ -530,7 +548,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP4_I:%.*]] = alloca i8*, align 8 @@ -544,7 +562,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -557,29 +575,29 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load i8*, i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 // CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load i8**, i8*** [[TMP30]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i8*, i8** [[TMP31]], i64 9 // CHECK1-NEXT: [[TMP32:%.*]] = load i8*, i8** [[ARRAYIDX2_I]], align 8 @@ -590,10 +608,10 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 // CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint i8* [[TMP43]] to i64 diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp @@ -801,24 +801,30 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 // CHECK1-NEXT: store double* [[B]], double** [[B_ADDR]], align 8 // CHECK1-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -828,83 +834,102 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !10 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP4]], double*** [[TMP29]], align 8, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 8, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 8, !llvm.access.group !10 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -917,16 +942,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -937,116 +957,120 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !14 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP37]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP38]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[TMP10]], double*** [[TMP39]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP12]], double*** [[TMP40]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK1-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -1065,24 +1089,30 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 // CHECK1-NEXT: store double* [[B]], double** [[B_ADDR]], align 8 // CHECK1-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[CONV]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1092,83 +1122,102 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !19 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP4]], double*** [[TMP29]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !19 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -1181,16 +1230,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1201,116 +1245,120 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !22 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !22 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !22 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !22 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !22 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !22 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8, !llvm.access.group !22 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8, !llvm.access.group !22 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8, !llvm.access.group !22 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8, !llvm.access.group !22 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !22 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP37]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP38]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[TMP10]], double*** [[TMP39]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP12]], double*** [[TMP40]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(%class.anon.4* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !22 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK1-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -1330,6 +1378,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store i64 [[CH]], i64* [[CH_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 @@ -1337,20 +1386,26 @@ // CHECK1-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[CH_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV1]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[CONV1]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1360,110 +1415,129 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]), !llvm.access.group !25 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK1-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK1-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP31]], align 8, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP6]], double*** [[TMP32]], align 8, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP33]], align 8, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store double** [[TMP10]], double*** [[TMP34]], align 8, !llvm.access.group !25 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !25 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK1: cond.true10: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !25 // CHECK1-NEXT: br label [[COND_END12:%.*]] // CHECK1: cond.false11: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK1-NEXT: br label [[COND_END12]] // CHECK1: cond.end12: -// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] +// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE10]] ], [ [[TMP44]], [[COND_FALSE11]] ] // CHECK1-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 -// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK1-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP50]], 0 // CHECK1-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -1476,16 +1550,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1496,116 +1565,120 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_2:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !28 -// CHECK1-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !28 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !28 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !28 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !28 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !28 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 8, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 8, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !28 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 8, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !28 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8, !llvm.access.group !28 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8, !llvm.access.group !28 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8, !llvm.access.group !28 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8, !llvm.access.group !28 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(%class.anon.2* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !28 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], %class.anon.7* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP37]], align 8, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], %class.anon.7* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP38]], align 8, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], %class.anon.7* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[TMP10]], double*** [[TMP39]], align 8, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], %class.anon.7* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP12]], double*** [[TMP40]], align 8, !llvm.access.group !28 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(%class.anon.7* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !28 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK1-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -1624,24 +1697,30 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 // CHECK1-NEXT: store double* [[B]], double** [[B_ADDR]], align 8 // CHECK1-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[CONV]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1651,83 +1730,102 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !31 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP4]], double*** [[TMP29]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !31 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -1740,16 +1838,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1760,116 +1853,120 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_10:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !34 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 8, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 8, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !34 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 8, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8, !llvm.access.group !34 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(%class.anon.3* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !34 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], %class.anon.10* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP37]], align 8, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], %class.anon.10* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP38]], align 8, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], %class.anon.10* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[TMP10]], double*** [[TMP39]], align 8, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], %class.anon.10* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP12]], double*** [[TMP40]], align 8, !llvm.access.group !34 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(%class.anon.10* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !34 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK1-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -1889,6 +1986,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK1-NEXT: store i64 [[CH]], i64* [[CH_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 @@ -1896,20 +1994,26 @@ // CHECK1-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[CH_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV1]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[CONV1]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -1920,92 +2024,109 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK1-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !37 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 -// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !37 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4, !llvm.access.group !37 -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !37 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]), !llvm.access.group !37 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !37 +// CHECK1-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK1-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !37 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 +// CHECK1-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK1-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !37 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8, !llvm.access.group !37 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8, !llvm.access.group !37 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP31]], align 8, !llvm.access.group !37 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP6]], double*** [[TMP32]], align 8, !llvm.access.group !37 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP33]], align 8, !llvm.access.group !37 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store double** [[TMP10]], double*** [[TMP34]], align 8, !llvm.access.group !37 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !37 +// CHECK1-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8, !llvm.access.group !37 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !37 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !37 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !37 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK1-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP43]], 0 // CHECK1-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -2018,17 +2139,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2038,143 +2154,148 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 +// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_13:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP25]] to i32 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP24]], [[CONV6]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !40 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !40 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !40 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !40 -// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !40 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !40 -// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] -// CHECK1-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !40 -// CHECK1-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK1-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !40 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !40 -// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] -// CHECK1-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8, !llvm.access.group !40 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 8, !llvm.access.group !40 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I6]], i32** [[TMP32]], align 8, !llvm.access.group !40 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP33]], align 8, !llvm.access.group !40 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP34]], align 8, !llvm.access.group !40 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(%class.anon.4* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !40 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !40 +// CHECK1-NEXT: [[TMP34:%.*]] = load double*, double** [[TMP10]], align 8, !llvm.access.group !40 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !40 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP34]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP36:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !40 +// CHECK1-NEXT: [[TMP37:%.*]] = load double*, double** [[TMP12]], align 8, !llvm.access.group !40 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !40 +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP37]], i64 [[IDXPROM11]] +// CHECK1-NEXT: [[TMP39:%.*]] = load double, double* [[ARRAYIDX12]], align 8, !llvm.access.group !40 +// CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK1-NEXT: [[TMP40:%.*]] = load double*, double** [[TMP8]], align 8, !llvm.access.group !40 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !40 +// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[TMP40]], i64 [[IDXPROM14]] +// CHECK1-NEXT: store double [[ADD13]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !40 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], %class.anon.13* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP42]], align 8, !llvm.access.group !40 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], %class.anon.13* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[I5]], i32** [[TMP43]], align 8, !llvm.access.group !40 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], %class.anon.13* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[TMP10]], double*** [[TMP44]], align 8, !llvm.access.group !40 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], %class.anon.13* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP12]], double*** [[TMP45]], align 8, !llvm.access.group !40 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(%class.anon.13* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !40 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK1-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP46]], 1 +// CHECK1-NEXT: store i32 [[ADD16]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] +// CHECK1-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] +// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]]) -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 -// CHECK1-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]]) +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 +// CHECK1-NEXT: br i1 [[TMP54]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP44]], 0 -// CHECK1-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 -// CHECK1-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 -// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] -// CHECK1-NEXT: store i32 [[ADD23]], i32* [[I6]], align 4 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP55]], 0 +// CHECK1-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 +// CHECK1-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 +// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] +// CHECK1-NEXT: store i32 [[ADD22]], i32* [[I5]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -2189,24 +2310,30 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 // CHECK1-NEXT: store double* [[B]], double** [[B_ADDR]], align 8 // CHECK1-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32* [[CONV]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2216,83 +2343,102 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !43 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !43 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !43 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8, !llvm.access.group !43 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8, !llvm.access.group !43 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8, !llvm.access.group !43 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP4]], double*** [[TMP29]], align 8, !llvm.access.group !43 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 8, !llvm.access.group !43 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 8, !llvm.access.group !43 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !43 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -2305,16 +2451,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2325,99 +2466,103 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_5:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_16:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP24]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !46 -// CHECK1-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !46 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !46 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !46 -// CHECK1-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !46 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !46 -// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP24]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !46 -// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK1-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !46 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !46 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP27]], i64 [[IDXPROM9]] +// CHECK1-NEXT: [[TMP30:%.*]] = load double*, double** [[TMP10]], align 8, !llvm.access.group !46 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !46 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP30]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP32:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !46 +// CHECK1-NEXT: [[TMP33:%.*]] = load double*, double** [[TMP12]], align 8, !llvm.access.group !46 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !46 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP33]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !46 +// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK1-NEXT: [[TMP36:%.*]] = load double*, double** [[TMP8]], align 8, !llvm.access.group !46 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !46 +// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP36]], i64 [[IDXPROM9]] // CHECK1-NEXT: store double [[ADD8]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !46 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP29]], align 8, !llvm.access.group !46 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP30]], align 8, !llvm.access.group !46 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP31]], align 8, !llvm.access.group !46 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP32]], align 8, !llvm.access.group !46 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.5* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !46 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], %class.anon.16* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP38]], align 8, !llvm.access.group !46 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], %class.anon.16* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP39]], align 8, !llvm.access.group !46 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], %class.anon.16* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[TMP10]], double*** [[TMP40]], align 8, !llvm.access.group !46 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], %class.anon.16* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP12]], double*** [[TMP41]], align 8, !llvm.access.group !46 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.16* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !46 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -2425,12 +2570,12 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK1-NEXT: br i1 [[TMP44]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP45]], 0 // CHECK1-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK1-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] @@ -2450,6 +2595,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 // CHECK1-NEXT: store i64 [[CH]], i64* [[CH_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 @@ -2457,20 +2603,26 @@ // CHECK1-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[CH_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV1]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[CONV1]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.17*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.17* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.17*, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -2481,92 +2633,109 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK1-NEXT: store %struct.anon.17* [[__CONTEXT]], %struct.anon.17** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], %struct.anon.17* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !49 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !49 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !49 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !49 -// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !49 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4, !llvm.access.group !49 -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !49 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]), !llvm.access.group !49 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !49 +// CHECK1-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK1-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !49 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !49 +// CHECK1-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK1-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !49 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8, !llvm.access.group !49 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8, !llvm.access.group !49 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP31]], align 8, !llvm.access.group !49 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP6]], double*** [[TMP32]], align 8, !llvm.access.group !49 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP33]], align 8, !llvm.access.group !49 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store double** [[TMP10]], double*** [[TMP34]], align 8, !llvm.access.group !49 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !49 +// CHECK1-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8, !llvm.access.group !49 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !49 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !49 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !49 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK1-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP43]], 0 // CHECK1-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -2579,17 +2748,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2599,120 +2763,125 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_6:%.*]], align 8 +// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_20:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !52 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !52 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !52 -// CHECK1-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !52 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !52 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !52 -// CHECK1-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !52 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !52 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[TMP25]], i64 [[IDXPROM8]] -// CHECK1-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX9]], align 8, !llvm.access.group !52 -// CHECK1-NEXT: [[ADD10:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK1-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !52 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !52 -// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP28]], i64 [[IDXPROM11]] -// CHECK1-NEXT: store double [[ADD10]], double* [[ARRAYIDX12]], align 8, !llvm.access.group !52 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP30]], align 8, !llvm.access.group !52 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I6]], i32** [[TMP31]], align 8, !llvm.access.group !52 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP32]], align 8, !llvm.access.group !52 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP33]], align 8, !llvm.access.group !52 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.6* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !52 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !52 +// CHECK1-NEXT: [[TMP33:%.*]] = load double*, double** [[TMP10]], align 8, !llvm.access.group !52 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !52 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP33]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !52 +// CHECK1-NEXT: [[TMP36:%.*]] = load double*, double** [[TMP12]], align 8, !llvm.access.group !52 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !52 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP36]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP38:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !52 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK1-NEXT: [[TMP39:%.*]] = load double*, double** [[TMP8]], align 8, !llvm.access.group !52 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !52 +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP40]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP39]], i64 [[IDXPROM10]] +// CHECK1-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !52 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], %class.anon.20* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double** [[TMP8]], double*** [[TMP41]], align 8, !llvm.access.group !52 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], %class.anon.20* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[I5]], i32** [[TMP42]], align 8, !llvm.access.group !52 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], %class.anon.20* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store double** [[TMP10]], double*** [[TMP43]], align 8, !llvm.access.group !52 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], %class.anon.20* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store double** [[TMP12]], double*** [[TMP44]], align 8, !llvm.access.group !52 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.20* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !52 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP45]], 1 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK1-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP37]], 0 -// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 -// CHECK1-NEXT: [[MUL16:%.*]] = mul nsw i32 [[DIV15]], 1 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL16]] -// CHECK1-NEXT: store i32 [[ADD17]], i32* [[I6]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP48]], 0 +// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] +// CHECK1-NEXT: store i32 [[ADD16]], i32* [[I5]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -2761,23 +2930,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK3-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK3-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2787,81 +2962,100 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !11 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP4]], double*** [[TMP27]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store double** [[TMP6]], double*** [[TMP28]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP29]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -2874,16 +3068,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2894,111 +3083,115 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !15 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP37]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP38]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[TMP10]], double*** [[TMP39]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP12]], double*** [[TMP40]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK3-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK3-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -3017,23 +3210,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK3-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK3-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3043,81 +3242,100 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !20 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP4]], double*** [[TMP27]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store double** [[TMP6]], double*** [[TMP28]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP29]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !20 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -3130,16 +3348,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3150,111 +3363,115 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(%class.anon.1* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !23 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP37]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP38]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[TMP10]], double*** [[TMP39]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP12]], double*** [[TMP40]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(%class.anon.4* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !23 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK3-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK3-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -3274,25 +3491,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store i32 [[CH]], i32* [[CH_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK3-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK3-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CH_ADDR]], i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[CH_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3302,108 +3526,127 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK3-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]), !llvm.access.group !26 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP27]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP28]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[TMP4]], i32** [[TMP29]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store double** [[TMP10]], double*** [[TMP32]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !26 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK3: cond.true10: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !26 // CHECK3-NEXT: br label [[COND_END12:%.*]] // CHECK3: cond.false11: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK3-NEXT: br label [[COND_END12]] // CHECK3: cond.end12: -// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] +// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ] // CHECK3-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP45]]) +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK3-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK3-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP48]], 0 // CHECK3-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -3416,16 +3659,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3436,111 +3674,115 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_2:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_7:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(%class.anon.2* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !29 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], %class.anon.7* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP37]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], %class.anon.7* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP38]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], %class.anon.7* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[TMP10]], double*** [[TMP39]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], %class.anon.7* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP12]], double*** [[TMP40]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(%class.anon.7* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !29 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK3-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK3-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -3559,23 +3801,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK3-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK3-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3585,81 +3833,100 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !32 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !32 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !32 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4, !llvm.access.group !32 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4, !llvm.access.group !32 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4, !llvm.access.group !32 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP4]], double*** [[TMP27]], align 4, !llvm.access.group !32 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store double** [[TMP6]], double*** [[TMP28]], align 4, !llvm.access.group !32 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP29]], align 4, !llvm.access.group !32 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !32 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -3672,16 +3939,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3692,111 +3954,115 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_10:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(%class.anon.3* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !35 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], %class.anon.10* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP37]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], %class.anon.10* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP38]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], %class.anon.10* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[TMP10]], double*** [[TMP39]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], %class.anon.10* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP12]], double*** [[TMP40]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(%class.anon.10* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK3-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK3-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -3816,25 +4082,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK3-NEXT: store i32 [[CH]], i32* [[CH_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK3-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK3-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CH_ADDR]], i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[CH_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -3845,89 +4118,107 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK3-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !38 -// CHECK3-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !38 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !38 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]), !llvm.access.group !38 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP27]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP28]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[TMP4]], i32** [[TMP29]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store double** [[TMP10]], double*** [[TMP32]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !38 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK3-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -3940,17 +4231,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3961,130 +4247,136 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_13:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !41 -// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !41 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !41 -// CHECK3-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !41 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK3-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX8]], align 4, !llvm.access.group !41 -// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK3-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !41 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 -// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double*, double** [[TMP10]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP34]], i32 [[TMP35]] +// CHECK3-NEXT: [[TMP36:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: [[TMP37:%.*]] = load double*, double** [[TMP12]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP37]], i32 [[TMP38]] +// CHECK3-NEXT: [[TMP39:%.*]] = load double, double* [[ARRAYIDX8]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK3-NEXT: [[TMP40:%.*]] = load double*, double** [[TMP8]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP40]], i32 [[TMP41]] // CHECK3-NEXT: store double [[ADD9]], double* [[ARRAYIDX10]], align 4, !llvm.access.group !41 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 4, !llvm.access.group !41 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I4]], i32** [[TMP32]], align 4, !llvm.access.group !41 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP33]], align 4, !llvm.access.group !41 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP34]], align 4, !llvm.access.group !41 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(%class.anon.4* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !41 +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], %class.anon.13* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP42]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], %class.anon.13* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[I4]], i32** [[TMP43]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], %class.anon.13* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[TMP10]], double*** [[TMP44]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], %class.anon.13* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP12]], double*** [[TMP45]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(%class.anon.13* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !41 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP46]], 1 // CHECK3-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK3-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] // CHECK3-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK3-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] // CHECK3-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]]) -// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 -// CHECK3-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]]) +// CHECK3-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 +// CHECK3-NEXT: br i1 [[TMP54]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK3-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP55]], 0 // CHECK3-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK3-NEXT: [[MUL16:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL16]] @@ -4103,23 +4395,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK3-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK3-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4129,81 +4427,100 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !44 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !44 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !44 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4, !llvm.access.group !44 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4, !llvm.access.group !44 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4, !llvm.access.group !44 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP4]], double*** [[TMP27]], align 4, !llvm.access.group !44 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store double** [[TMP6]], double*** [[TMP28]], align 4, !llvm.access.group !44 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP29]], align 4, !llvm.access.group !44 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !44 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -4216,16 +4533,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4236,94 +4548,98 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_5:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_16:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP24]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i32 [[TMP22]] -// CHECK3-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK3-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP27]], i32 [[TMP28]] +// CHECK3-NEXT: [[TMP30:%.*]] = load double*, double** [[TMP10]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP30]], i32 [[TMP31]] +// CHECK3-NEXT: [[TMP32:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP33:%.*]] = load double*, double** [[TMP12]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[TMP33]], i32 [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK3-NEXT: [[TMP36:%.*]] = load double*, double** [[TMP8]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP36]], i32 [[TMP37]] // CHECK3-NEXT: store double [[ADD6]], double* [[ARRAYIDX7]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP29]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP30]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP31]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP32]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.5* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !47 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], %class.anon.16* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP38]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], %class.anon.16* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP39]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], %class.anon.16* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[TMP10]], double*** [[TMP40]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], %class.anon.16* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP12]], double*** [[TMP41]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.16* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !47 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -4331,12 +4647,12 @@ // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK3-NEXT: br i1 [[TMP44]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP45]], 0 // CHECK3-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK3-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -4356,25 +4672,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK3-NEXT: store i32 [[CH]], i32* [[CH_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK3-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK3-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32* [[CH_ADDR]], i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[CH_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.17*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.17* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.17*, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -4385,89 +4708,107 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK3-NEXT: store %struct.anon.17* [[__CONTEXT]], %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], %struct.anon.17* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !50 -// CHECK3-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !50 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !50 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]), !llvm.access.group !50 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP27]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP28]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[TMP4]], i32** [[TMP29]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store double** [[TMP10]], double*** [[TMP32]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !50 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK3-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -4480,17 +4821,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4501,96 +4837,102 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_6:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_19:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP29]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !53 -// CHECK3-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !53 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i32 [[TMP23]] -// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !53 -// CHECK3-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !53 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP25]], i32 [[TMP26]] -// CHECK3-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !53 -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK3-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !53 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 [[TMP29]] +// CHECK3-NEXT: [[TMP33:%.*]] = load double*, double** [[TMP10]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP33]], i32 [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: [[TMP36:%.*]] = load double*, double** [[TMP12]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP36]], i32 [[TMP37]] +// CHECK3-NEXT: [[TMP38:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK3-NEXT: [[TMP39:%.*]] = load double*, double** [[TMP8]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP39]], i32 [[TMP40]] // CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !53 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP30]], align 4, !llvm.access.group !53 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I4]], i32** [[TMP31]], align 4, !llvm.access.group !53 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP32]], align 4, !llvm.access.group !53 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP33]], align 4, !llvm.access.group !53 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.6* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !53 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], %class.anon.19* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double** [[TMP8]], double*** [[TMP41]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], %class.anon.19* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[I4]], i32** [[TMP42]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], %class.anon.19* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store double** [[TMP10]], double*** [[TMP43]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], %class.anon.19* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store double** [[TMP12]], double*** [[TMP44]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.19* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !53 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP45]], 1 // CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -4598,12 +4940,12 @@ // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK3-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK3-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP48]], 0 // CHECK3-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -5200,24 +5542,30 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 // CHECK9-NEXT: store double* [[B]], double** [[B_ADDR]], align 8 // CHECK9-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5227,83 +5575,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !17 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[TMP4]], double*** [[TMP29]], align 8, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 8, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 8, !llvm.access.group !17 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -5316,16 +5683,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5338,104 +5700,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !21 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !21 -// CHECK9-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !21 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !21 -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !21 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 8, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 8, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !21 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 8, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !21 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -5454,24 +5820,30 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 // CHECK9-NEXT: store double* [[B]], double** [[B_ADDR]], align 8 // CHECK9-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[CONV]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5481,83 +5853,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !26 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !26 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !26 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8, !llvm.access.group !26 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8, !llvm.access.group !26 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8, !llvm.access.group !26 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[TMP4]], double*** [[TMP29]], align 8, !llvm.access.group !26 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 8, !llvm.access.group !26 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 8, !llvm.access.group !26 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !26 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -5570,16 +5961,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5592,104 +5978,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !29 -// CHECK9-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !29 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !29 -// CHECK9-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !29 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !29 -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !29 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 8, !llvm.access.group !29 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !29 +// CHECK9-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 8, !llvm.access.group !29 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !29 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 8, !llvm.access.group !29 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !29 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -5709,6 +6099,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], i64* [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 @@ -5716,20 +6107,26 @@ // CHECK9-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[CH_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV1]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5739,110 +6136,129 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !32 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]), !llvm.access.group !32 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP4]], i32** [[TMP31]], align 8, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[TMP6]], double*** [[TMP32]], align 8, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store double** [[TMP8]], double*** [[TMP33]], align 8, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store double** [[TMP10]], double*** [[TMP34]], align 8, !llvm.access.group !32 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !32 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !32 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK9-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK9: cond.true10: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !32 // CHECK9-NEXT: br label [[COND_END12:%.*]] // CHECK9: cond.false11: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK9-NEXT: br label [[COND_END12]] // CHECK9: cond.end12: -// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] +// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE10]] ], [ [[TMP44]], [[COND_FALSE11]] ] // CHECK9-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 -// CHECK9-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 -// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK9-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP50]], 0 // CHECK9-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -5855,16 +6271,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5877,104 +6288,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !35 -// CHECK9-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !35 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !35 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !35 -// CHECK9-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !35 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !35 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !35 -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !35 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !35 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 8, !llvm.access.group !35 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !35 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !35 +// CHECK9-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 8, !llvm.access.group !35 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !35 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !35 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 8, !llvm.access.group !35 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !35 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !35 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -5993,24 +6408,30 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 // CHECK9-NEXT: store double* [[B]], double** [[B_ADDR]], align 8 // CHECK9-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[CONV]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6020,83 +6441,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !38 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !38 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !38 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8, !llvm.access.group !38 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8, !llvm.access.group !38 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8, !llvm.access.group !38 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[TMP4]], double*** [[TMP29]], align 8, !llvm.access.group !38 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 8, !llvm.access.group !38 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 8, !llvm.access.group !38 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !38 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -6109,16 +6549,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6131,104 +6566,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !41 -// CHECK9-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !41 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !41 -// CHECK9-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !41 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !41 -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !41 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 8, !llvm.access.group !41 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !41 +// CHECK9-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 8, !llvm.access.group !41 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !41 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 8, !llvm.access.group !41 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !41 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -6248,6 +6687,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], i64* [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 @@ -6255,20 +6695,26 @@ // CHECK9-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[CH_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV1]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -6279,92 +6725,109 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !44 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4, !llvm.access.group !44 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !44 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]), !llvm.access.group !44 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !44 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !44 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8, !llvm.access.group !44 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8, !llvm.access.group !44 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP4]], i32** [[TMP31]], align 8, !llvm.access.group !44 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[TMP6]], double*** [[TMP32]], align 8, !llvm.access.group !44 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store double** [[TMP8]], double*** [[TMP33]], align 8, !llvm.access.group !44 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store double** [[TMP10]], double*** [[TMP34]], align 8, !llvm.access.group !44 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !44 +// CHECK9-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8, !llvm.access.group !44 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !44 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK9-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP43]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -6377,17 +6840,12 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -6397,133 +6855,138 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP25]] to i32 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP24]], [[CONV6]] +// CHECK9-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 -// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !47 -// CHECK9-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !47 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !47 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !47 -// CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !47 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !47 -// CHECK9-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] -// CHECK9-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !47 -// CHECK9-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !47 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !47 -// CHECK9-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] -// CHECK9-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8, !llvm.access.group !47 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !47 +// CHECK9-NEXT: [[TMP34:%.*]] = load double*, double** [[TMP10]], align 8, !llvm.access.group !47 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !47 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP34]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP36:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !47 +// CHECK9-NEXT: [[TMP37:%.*]] = load double*, double** [[TMP12]], align 8, !llvm.access.group !47 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !47 +// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP37]], i64 [[IDXPROM11]] +// CHECK9-NEXT: [[TMP39:%.*]] = load double, double* [[ARRAYIDX12]], align 8, !llvm.access.group !47 +// CHECK9-NEXT: [[ADD13:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK9-NEXT: [[TMP40:%.*]] = load double*, double** [[TMP8]], align 8, !llvm.access.group !47 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !47 +// CHECK9-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[TMP40]], i64 [[IDXPROM14]] +// CHECK9-NEXT: store double [[ADD13]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !47 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP42]], 1 +// CHECK9-NEXT: store i32 [[ADD16]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] +// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP37]]) -// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 -// CHECK9-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP47:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, i32* [[TMP47]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP48]]) +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK9-NEXT: br i1 [[TMP50]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP40]], 0 -// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 -// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 -// CHECK9-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] -// CHECK9-NEXT: store i32 [[ADD23]], i32* [[I6]], align 4 +// CHECK9-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP51]], 0 +// CHECK9-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 +// CHECK9-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 +// CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] +// CHECK9-NEXT: store i32 [[ADD22]], i32* [[I5]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -6538,24 +7001,30 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 // CHECK9-NEXT: store double* [[B]], double** [[B_ADDR]], align 8 // CHECK9-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32* [[CONV]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6565,83 +7034,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !50 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !50 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !50 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8, !llvm.access.group !50 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8, !llvm.access.group !50 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8, !llvm.access.group !50 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[TMP4]], double*** [[TMP29]], align 8, !llvm.access.group !50 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 8, !llvm.access.group !50 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 8, !llvm.access.group !50 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !50 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -6654,16 +7142,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6676,87 +7159,91 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP24]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !53 -// CHECK9-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !53 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !53 -// CHECK9-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !53 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 -// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP24]], i64 [[IDXPROM6]] -// CHECK9-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !53 -// CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !53 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP27]], i64 [[IDXPROM9]] +// CHECK9-NEXT: [[TMP30:%.*]] = load double*, double** [[TMP10]], align 8, !llvm.access.group !53 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP30]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP32:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !53 +// CHECK9-NEXT: [[TMP33:%.*]] = load double*, double** [[TMP12]], align 8, !llvm.access.group !53 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP33]], i64 [[IDXPROM6]] +// CHECK9-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !53 +// CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK9-NEXT: [[TMP36:%.*]] = load double*, double** [[TMP8]], align 8, !llvm.access.group !53 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 +// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP36]], i64 [[IDXPROM9]] // CHECK9-NEXT: store double [[ADD8]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !53 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK9: omp.inner.for.end: @@ -6764,12 +7251,12 @@ // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK9-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK9-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK9-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] @@ -6789,6 +7276,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], i64* [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 @@ -6796,20 +7284,26 @@ // CHECK9-NEXT: store double* [[C]], double** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[CH_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV1]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -6820,92 +7314,109 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !56 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !56 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4, !llvm.access.group !56 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !56 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]), !llvm.access.group !56 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !56 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !56 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !56 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8, !llvm.access.group !56 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8, !llvm.access.group !56 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP4]], i32** [[TMP31]], align 8, !llvm.access.group !56 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store double** [[TMP6]], double*** [[TMP32]], align 8, !llvm.access.group !56 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store double** [[TMP8]], double*** [[TMP33]], align 8, !llvm.access.group !56 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store double** [[TMP10]], double*** [[TMP34]], align 8, !llvm.access.group !56 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !56 +// CHECK9-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8, !llvm.access.group !56 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !56 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !56 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !56 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK9-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP43]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -6918,17 +7429,12 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], double** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca double**, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -6938,110 +7444,115 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store double** [[A]], double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store double** [[B]], double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store double** [[C]], double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !59 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !59 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !59 -// CHECK9-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !59 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !59 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !59 -// CHECK9-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !59 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !59 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[TMP25]], i64 [[IDXPROM8]] -// CHECK9-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX9]], align 8, !llvm.access.group !59 -// CHECK9-NEXT: [[ADD10:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK9-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !59 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !59 -// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP28]], i64 [[IDXPROM11]] -// CHECK9-NEXT: store double [[ADD10]], double* [[ARRAYIDX12]], align 8, !llvm.access.group !59 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !59 +// CHECK9-NEXT: [[TMP33:%.*]] = load double*, double** [[TMP10]], align 8, !llvm.access.group !59 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !59 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP33]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !59 +// CHECK9-NEXT: [[TMP36:%.*]] = load double*, double** [[TMP12]], align 8, !llvm.access.group !59 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !59 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP36]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP38:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !59 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK9-NEXT: [[TMP39:%.*]] = load double*, double** [[TMP8]], align 8, !llvm.access.group !59 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !59 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP40]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP39]], i64 [[IDXPROM10]] +// CHECK9-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !59 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 -// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK9-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK9-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP33]], 0 -// CHECK9-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 -// CHECK9-NEXT: [[MUL16:%.*]] = mul nsw i32 [[DIV15]], 1 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL16]] -// CHECK9-NEXT: store i32 [[ADD17]], i32* [[I6]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] +// CHECK9-NEXT: store i32 [[ADD16]], i32* [[I5]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -7570,24 +8081,30 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 8 // CHECK9-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32* [[CONV]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32** [[A_ADDR]], i32*** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32** [[B_ADDR]], i32*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[C_ADDR]], i32*** [[TMP3]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7597,83 +8114,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !62 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !62 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !62 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !62 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !62 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !62 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !62 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !62 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !62 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8, !llvm.access.group !62 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8, !llvm.access.group !62 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8, !llvm.access.group !62 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[TMP4]], i32*** [[TMP29]], align 8, !llvm.access.group !62 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32** [[TMP6]], i32*** [[TMP30]], align 8, !llvm.access.group !62 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i32** [[TMP8]], i32*** [[TMP31]], align 8, !llvm.access.group !62 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !62 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !62 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !62 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -7686,16 +8222,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7708,104 +8239,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !65 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !65 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !65 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !65 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !65 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !65 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !65 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !65 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !65 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !65 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !65 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP10]], align 8, !llvm.access.group !65 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !65 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !65 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32*, i32** [[TMP12]], align 8, !llvm.access.group !65 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !65 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !65 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP8]], align 8, !llvm.access.group !65 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !65 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4, !llvm.access.group !65 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -7824,24 +8359,30 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 8 // CHECK9-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* [[CONV]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32** [[A_ADDR]], i32*** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32** [[B_ADDR]], i32*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[C_ADDR]], i32*** [[TMP3]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.16*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7851,83 +8392,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !68 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !68 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !68 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !68 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !68 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !68 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !68 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !68 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !68 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8, !llvm.access.group !68 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8, !llvm.access.group !68 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8, !llvm.access.group !68 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[TMP4]], i32*** [[TMP29]], align 8, !llvm.access.group !68 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32** [[TMP6]], i32*** [[TMP30]], align 8, !llvm.access.group !68 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i32** [[TMP8]], i32*** [[TMP31]], align 8, !llvm.access.group !68 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.17*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !68 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !68 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !68 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -7940,16 +8500,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.17* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.17*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7962,104 +8517,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.17* [[__CONTEXT]], %struct.anon.17** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], %struct.anon.17* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !71 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !71 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !71 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !71 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !71 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !71 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !71 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !71 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !71 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !71 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !71 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP10]], align 8, !llvm.access.group !71 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !71 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !71 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32*, i32** [[TMP12]], align 8, !llvm.access.group !71 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !71 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !71 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP8]], align 8, !llvm.access.group !71 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !71 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4, !llvm.access.group !71 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -8079,6 +8638,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], i64* [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -8086,20 +8646,26 @@ // CHECK9-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[CH_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV1]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32** [[A_ADDR]], i32*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[B_ADDR]], i32*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32** [[C_ADDR]], i32*** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8109,110 +8675,129 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_19:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK9-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !74 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !74 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]]), !llvm.access.group !74 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP4]], i32** [[TMP31]], align 8, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[TMP6]], i32*** [[TMP32]], align 8, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32** [[TMP8]], i32*** [[TMP33]], align 8, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i32** [[TMP10]], i32*** [[TMP34]], align 8, !llvm.access.group !74 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.19*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !74 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !74 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !74 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !74 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !74 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !74 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !74 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK9-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK9: cond.true10: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !74 // CHECK9-NEXT: br label [[COND_END12:%.*]] // CHECK9: cond.false11: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 // CHECK9-NEXT: br label [[COND_END12]] // CHECK9: cond.end12: -// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] +// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE10]] ], [ [[TMP44]], [[COND_FALSE11]] ] // CHECK9-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !74 -// CHECK9-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 -// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK9-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP50]], 0 // CHECK9-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -8225,16 +8810,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.19* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.19*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8247,104 +8827,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.19* [[__CONTEXT]], %struct.anon.19** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.19*, %struct.anon.19** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_19:%.*]], %struct.anon.19* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !77 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !77 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !77 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !77 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !77 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !77 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !77 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !77 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !77 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !77 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !77 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !77 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !77 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !77 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !77 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP10]], align 8, !llvm.access.group !77 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !77 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !77 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32*, i32** [[TMP12]], align 8, !llvm.access.group !77 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !77 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !77 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP8]], align 8, !llvm.access.group !77 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !77 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4, !llvm.access.group !77 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !77 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !77 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !77 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP78:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -8363,24 +8947,30 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 8 // CHECK9-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..38 to void (i32*, i32*, ...)*), i32* [[CONV]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32** [[A_ADDR]], i32*** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32** [[B_ADDR]], i32*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[C_ADDR]], i32*** [[TMP3]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.20*)* @.omp_outlined..38 to void (i32*, i32*, ...)*), %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..38 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.20* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.20*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8390,83 +8980,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.20* [[__CONTEXT]], %struct.anon.20** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.20*, %struct.anon.20** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], %struct.anon.20* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !80 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !80 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !80 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !80 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !80 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !80 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !80 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !80 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !80 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !80 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !80 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8, !llvm.access.group !80 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8, !llvm.access.group !80 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8, !llvm.access.group !80 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[TMP4]], i32*** [[TMP29]], align 8, !llvm.access.group !80 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32** [[TMP6]], i32*** [[TMP30]], align 8, !llvm.access.group !80 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i32** [[TMP8]], i32*** [[TMP31]], align 8, !llvm.access.group !80 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.21*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !80 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !80 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !80 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !80 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !80 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !80 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP81:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -8479,16 +9088,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..39 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.21* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.21*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8501,104 +9105,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.21* [[__CONTEXT]], %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], %struct.anon.21* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !83 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !83 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !83 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !83 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !83 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !83 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !83 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !83 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !83 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !83 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !83 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !83 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !83 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !83 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !83 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP10]], align 8, !llvm.access.group !83 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !83 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !83 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32*, i32** [[TMP12]], align 8, !llvm.access.group !83 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !83 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !83 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP8]], align 8, !llvm.access.group !83 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !83 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4, !llvm.access.group !83 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !83 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !83 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !83 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP84:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -8618,6 +9226,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], i64* [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -8625,20 +9234,26 @@ // CHECK9-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[CH_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..42 to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV1]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32** [[A_ADDR]], i32*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[B_ADDR]], i32*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32** [[C_ADDR]], i32*** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.22*)* @.omp_outlined..42 to void (i32*, i32*, ...)*), %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..42 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.22* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.22*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -8649,92 +9264,109 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store %struct.anon.22* [[__CONTEXT]], %struct.anon.22** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.22*, %struct.anon.22** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], %struct.anon.22* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !86 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !86 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !86 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !86 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !86 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !86 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !86 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4, !llvm.access.group !86 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !86 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**, i64)* @.omp_outlined..43 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i64 [[TMP24]]), !llvm.access.group !86 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !86 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !86 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !86 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !86 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8, !llvm.access.group !86 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8, !llvm.access.group !86 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP4]], i32** [[TMP31]], align 8, !llvm.access.group !86 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[TMP6]], i32*** [[TMP32]], align 8, !llvm.access.group !86 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32** [[TMP8]], i32*** [[TMP33]], align 8, !llvm.access.group !86 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i32** [[TMP10]], i32*** [[TMP34]], align 8, !llvm.access.group !86 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !86 +// CHECK9-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8, !llvm.access.group !86 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.23*)* @.omp_outlined..43 to void (i32*, i32*, ...)*), %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !86 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !86 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !86 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !86 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !86 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !86 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP87:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK9-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP43]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -8747,17 +9379,12 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..43 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.23* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.23*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8767,133 +9394,138 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.23* [[__CONTEXT]], %struct.anon.23** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.23*, %struct.anon.23** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], %struct.anon.23* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP25]] to i32 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP24]], [[CONV6]] +// CHECK9-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !89 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !89 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM12]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX13]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !89 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM15]] -// CHECK9-NEXT: store i32 [[ADD14]], i32* [[ARRAYIDX16]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[TMP10]], align 8, !llvm.access.group !89 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP34]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32*, i32** [[TMP12]], align 8, !llvm.access.group !89 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[TMP37]], i64 [[IDXPROM11]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[ARRAYIDX12]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP36]], [[TMP39]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32*, i32** [[TMP8]], align 8, !llvm.access.group !89 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[TMP40]], i64 [[IDXPROM14]] +// CHECK9-NEXT: store i32 [[ADD13]], i32* [[ARRAYIDX15]], align 4, !llvm.access.group !89 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP42]], 1 +// CHECK9-NEXT: store i32 [[ADD16]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP90:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] +// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP37]]) -// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 -// CHECK9-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP47:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, i32* [[TMP47]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP48]]) +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK9-NEXT: br i1 [[TMP50]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP40]], 0 -// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 -// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 -// CHECK9-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] -// CHECK9-NEXT: store i32 [[ADD23]], i32* [[I6]], align 4 +// CHECK9-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP51]], 0 +// CHECK9-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 +// CHECK9-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 +// CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] +// CHECK9-NEXT: store i32 [[ADD22]], i32* [[I5]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -8908,24 +9540,30 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 8 // CHECK9-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..46 to void (i32*, i32*, ...)*), i32* [[CONV]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32** [[A_ADDR]], i32*** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32** [[B_ADDR]], i32*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[C_ADDR]], i32*** [[TMP3]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.25*)* @.omp_outlined..46 to void (i32*, i32*, ...)*), %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..46 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.25* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.25*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8935,83 +9573,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_26:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.25* [[__CONTEXT]], %struct.anon.25** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.25*, %struct.anon.25** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25:%.*]], %struct.anon.25* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !92 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !92 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !92 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !92 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !92 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !92 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..47 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !92 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !92 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !92 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !92 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !92 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8, !llvm.access.group !92 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8, !llvm.access.group !92 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8, !llvm.access.group !92 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[TMP4]], i32*** [[TMP29]], align 8, !llvm.access.group !92 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32** [[TMP6]], i32*** [[TMP30]], align 8, !llvm.access.group !92 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i32** [[TMP8]], i32*** [[TMP31]], align 8, !llvm.access.group !92 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.26*)* @.omp_outlined..47 to void (i32*, i32*, ...)*), %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !92 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !92 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !92 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !92 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !92 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !92 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP93:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -9024,16 +9681,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..47 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.26* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.26*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9046,87 +9698,91 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.26* [[__CONTEXT]], %struct.anon.26** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.26*, %struct.anon.26** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_26:%.*]], %struct.anon.26* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP24]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !95 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !95 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !95 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !95 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !95 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !95 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !95 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !95 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !95 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !95 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !95 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !95 -// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[TMP24]], i64 [[IDXPROM6]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !95 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !95 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !95 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i64 [[IDXPROM9]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32*, i32** [[TMP10]], align 8, !llvm.access.group !95 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !95 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP30]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !95 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32*, i32** [[TMP12]], align 8, !llvm.access.group !95 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !95 +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[TMP33]], i64 [[IDXPROM6]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !95 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], [[TMP35]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32*, i32** [[TMP8]], align 8, !llvm.access.group !95 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !95 +// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[TMP36]], i64 [[IDXPROM9]] // CHECK9-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX10]], align 4, !llvm.access.group !95 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !95 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !95 +// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !95 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP96:![0-9]+]] // CHECK9: omp.inner.for.end: @@ -9134,12 +9790,12 @@ // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK9-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK9-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK9-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] @@ -9159,6 +9815,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_27:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], i64* [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -9166,20 +9823,26 @@ // CHECK9-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[CH_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..50 to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV1]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], %struct.anon.27* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], %struct.anon.27* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], %struct.anon.27* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32** [[A_ADDR]], i32*** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], %struct.anon.27* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[B_ADDR]], i32*** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], %struct.anon.27* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32** [[C_ADDR]], i32*** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.27*)* @.omp_outlined..50 to void (i32*, i32*, ...)*), %struct.anon.27* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..50 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.27* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.27*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -9190,92 +9853,109 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_28:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store %struct.anon.27* [[__CONTEXT]], %struct.anon.27** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.27*, %struct.anon.27** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_27:%.*]], %struct.anon.27* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], %struct.anon.27* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], %struct.anon.27* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], %struct.anon.27* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], %struct.anon.27* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !98 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !98 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !98 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !98 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !98 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !98 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !98 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4, !llvm.access.group !98 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !98 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**, i64)* @.omp_outlined..51 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i64 [[TMP24]]), !llvm.access.group !98 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !98 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !98 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !98 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !98 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8, !llvm.access.group !98 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8, !llvm.access.group !98 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP4]], i32** [[TMP31]], align 8, !llvm.access.group !98 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32** [[TMP6]], i32*** [[TMP32]], align 8, !llvm.access.group !98 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32** [[TMP8]], i32*** [[TMP33]], align 8, !llvm.access.group !98 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i32** [[TMP10]], i32*** [[TMP34]], align 8, !llvm.access.group !98 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !98 +// CHECK9-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8, !llvm.access.group !98 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.28*)* @.omp_outlined..51 to void (i32*, i32*, ...)*), %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !98 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !98 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !98 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !98 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !98 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !98 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP99:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK9-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP43]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -9288,17 +9968,12 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..51 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.28* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.28*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -9308,110 +9983,115 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.28* [[__CONTEXT]], %struct.anon.28** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.28*, %struct.anon.28** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_28:%.*]], %struct.anon.28* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !101 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !101 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !101 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !101 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !101 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !101 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !101 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !101 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !101 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP22]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !101 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !101 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !101 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i64 [[IDXPROM8]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !101 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP27]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !101 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !101 -// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[TMP28]], i64 [[IDXPROM11]] -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[ARRAYIDX12]], align 4, !llvm.access.group !101 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !101 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32*, i32** [[TMP10]], align 8, !llvm.access.group !101 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !101 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP33]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !101 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32*, i32** [[TMP12]], align 8, !llvm.access.group !101 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !101 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP36]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !101 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP38]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32*, i32** [[TMP8]], align 8, !llvm.access.group !101 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !101 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP40]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP39]], i64 [[IDXPROM10]] +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4, !llvm.access.group !101 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !101 -// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK9-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !101 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !101 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !101 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP102:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK9-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP33]], 0 -// CHECK9-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 -// CHECK9-NEXT: [[MUL16:%.*]] = mul nsw i32 [[DIV15]], 1 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL16]] -// CHECK9-NEXT: store i32 [[ADD17]], i32* [[I6]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] +// CHECK9-NEXT: store i32 [[ADD16]], i32* [[I5]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -9940,23 +10620,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK11-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK11-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9966,81 +10652,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !18 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[TMP4]], double*** [[TMP27]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store double** [[TMP6]], double*** [[TMP28]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store double** [[TMP8]], double*** [[TMP29]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -10053,16 +10758,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10075,99 +10775,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !22 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -10186,23 +10890,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK11-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK11-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10212,81 +10922,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !27 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !27 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !27 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !27 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[TMP4]], double*** [[TMP27]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store double** [[TMP6]], double*** [[TMP28]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store double** [[TMP8]], double*** [[TMP29]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !27 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !27 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -10299,16 +11028,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10321,99 +11045,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !30 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -10433,25 +11161,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], i32* [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK11-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK11-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CH_ADDR]], i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[CH_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10461,108 +11196,127 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]), !llvm.access.group !33 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP27]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP28]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP4]], i32** [[TMP29]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store double** [[TMP10]], double*** [[TMP32]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !33 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK11-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK11: cond.true10: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !33 // CHECK11-NEXT: br label [[COND_END12:%.*]] // CHECK11: cond.false11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 // CHECK11-NEXT: br label [[COND_END12]] // CHECK11: cond.end12: -// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] +// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ] // CHECK11-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP45]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK11-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP48]], 0 // CHECK11-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -10575,16 +11329,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10597,99 +11346,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !36 -// CHECK11-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !36 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !36 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !36 -// CHECK11-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !36 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !36 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !36 -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !36 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !36 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 4, !llvm.access.group !36 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !36 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !36 +// CHECK11-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 4, !llvm.access.group !36 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !36 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !36 +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 4, !llvm.access.group !36 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !36 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !36 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -10708,23 +11461,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK11-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK11-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10734,81 +11493,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !39 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !39 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[TMP4]], double*** [[TMP27]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store double** [[TMP6]], double*** [[TMP28]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store double** [[TMP8]], double*** [[TMP29]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !39 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -10821,16 +11599,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10843,99 +11616,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !42 -// CHECK11-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !42 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !42 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !42 -// CHECK11-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !42 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !42 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !42 -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !42 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !42 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP10]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[TMP32:%.*]] = load double*, double** [[TMP12]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load double*, double** [[TMP8]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !42 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -10955,25 +11732,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], i32* [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK11-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK11-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CH_ADDR]], i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[CH_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -10984,89 +11768,107 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !45 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !45 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !45 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !45 -// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !45 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !45 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]), !llvm.access.group !45 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP27]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP28]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP4]], i32** [[TMP29]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store double** [[TMP10]], double*** [[TMP32]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !45 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !45 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -11079,17 +11881,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11102,118 +11899,124 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !48 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !48 +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !48 -// CHECK11-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !48 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !48 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !48 -// CHECK11-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !48 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !48 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX8]], align 4, !llvm.access.group !48 -// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !48 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !48 -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double*, double** [[TMP10]], align 4, !llvm.access.group !48 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !48 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP34]], i32 [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !48 +// CHECK11-NEXT: [[TMP37:%.*]] = load double*, double** [[TMP12]], align 4, !llvm.access.group !48 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !48 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP37]], i32 [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load double, double* [[ARRAYIDX8]], align 4, !llvm.access.group !48 +// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK11-NEXT: [[TMP40:%.*]] = load double*, double** [[TMP8]], align 4, !llvm.access.group !48 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !48 +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP40]], i32 [[TMP41]] // CHECK11-NEXT: store double [[ADD9]], double* [[ARRAYIDX10]], align 4, !llvm.access.group !48 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK11-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] // CHECK11-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] // CHECK11-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP37]]) -// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 -// CHECK11-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP47:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, i32* [[TMP47]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP48]]) +// CHECK11-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK11-NEXT: br i1 [[TMP50]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK11-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP51]], 0 // CHECK11-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK11-NEXT: [[MUL16:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL16]] @@ -11232,23 +12035,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK11-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK11-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store double** [[A_ADDR]], double*** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store double** [[B_ADDR]], double*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[C_ADDR]], double*** [[TMP3]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11258,81 +12067,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load double**, double*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !51 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !51 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[TMP4]], double*** [[TMP27]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store double** [[TMP6]], double*** [[TMP28]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store double** [[TMP8]], double*** [[TMP29]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !51 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !51 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -11345,16 +12173,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11367,82 +12190,86 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP24]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !54 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !54 -// CHECK11-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !54 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !54 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i32 [[TMP22]] -// CHECK11-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !54 -// CHECK11-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !54 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !54 -// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !54 -// CHECK11-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !54 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !54 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP27]], i32 [[TMP28]] +// CHECK11-NEXT: [[TMP30:%.*]] = load double*, double** [[TMP10]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP30]], i32 [[TMP31]] +// CHECK11-NEXT: [[TMP32:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[TMP33:%.*]] = load double*, double** [[TMP12]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[ADD6:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load double*, double** [[TMP8]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP36]], i32 [[TMP37]] // CHECK11-NEXT: store double [[ADD6]], double* [[ARRAYIDX7]], align 4, !llvm.access.group !54 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -11450,12 +12277,12 @@ // CHECK11: omp.dispatch.inc: // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK11-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK11-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -11475,25 +12302,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], i32* [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 // CHECK11-NEXT: store double* [[B]], double** [[B_ADDR]], align 4 // CHECK11-NEXT: store double* [[C]], double** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, double**, double**, double**)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32* [[CH_ADDR]], i32* [[N_ADDR]], double** [[A_ADDR]], double** [[B_ADDR]], double** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[CH_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store double** [[A_ADDR]], double*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[B_ADDR]], double*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store double** [[C_ADDR]], double*** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -11504,89 +12338,107 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load double**, double*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !57 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !57 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !57 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !57 -// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !57 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !57 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]), !llvm.access.group !57 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP27]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP28]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP4]], i32** [[TMP29]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store double** [[TMP6]], double*** [[TMP30]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store double** [[TMP8]], double*** [[TMP31]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store double** [[TMP10]], double*** [[TMP32]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !57 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !57 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -11599,17 +12451,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], double** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca double**, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11622,84 +12469,90 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store double** [[A]], double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store double** [[B]], double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store double** [[C]], double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load double**, double*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load double**, double*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load double**, double*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load double**, double*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load double**, double*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load double**, double*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !60 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !60 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !60 -// CHECK11-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !60 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !60 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i32 [[TMP23]] -// CHECK11-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !60 -// CHECK11-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !60 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !60 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP25]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !60 -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !60 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !60 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 [[TMP29]] +// CHECK11-NEXT: [[TMP33:%.*]] = load double*, double** [[TMP10]], align 4, !llvm.access.group !60 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !60 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !60 +// CHECK11-NEXT: [[TMP36:%.*]] = load double*, double** [[TMP12]], align 4, !llvm.access.group !60 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !60 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP36]], i32 [[TMP37]] +// CHECK11-NEXT: [[TMP38:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !60 +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load double*, double** [[TMP8]], align 4, !llvm.access.group !60 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !60 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP39]], i32 [[TMP40]] // CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !60 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -11707,12 +12560,12 @@ // CHECK11: omp.dispatch.inc: // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK11-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP44]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -12235,23 +13088,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 4 // CHECK11-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32** [[A_ADDR]], i32*** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32** [[B_ADDR]], i32*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[C_ADDR]], i32*** [[TMP3]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12261,81 +13120,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !63 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !63 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !63 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !63 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !63 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !63 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !63 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !63 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !63 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4, !llvm.access.group !63 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4, !llvm.access.group !63 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4, !llvm.access.group !63 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[TMP4]], i32*** [[TMP27]], align 4, !llvm.access.group !63 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32** [[TMP6]], i32*** [[TMP28]], align 4, !llvm.access.group !63 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32** [[TMP8]], i32*** [[TMP29]], align 4, !llvm.access.group !63 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !63 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !63 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !63 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -12348,16 +13226,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12370,99 +13243,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !66 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !66 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !66 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !66 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !66 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !66 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !66 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !66 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !66 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !66 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !66 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !66 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !66 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !66 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !66 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP10]], align 4, !llvm.access.group !66 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !66 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !66 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[TMP12]], align 4, !llvm.access.group !66 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !66 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !66 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP8]], align 4, !llvm.access.group !66 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !66 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !66 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !66 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !66 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !66 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP67:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -12481,23 +13358,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 4 // CHECK11-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32** [[A_ADDR]], i32*** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32** [[B_ADDR]], i32*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[C_ADDR]], i32*** [[TMP3]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12507,81 +13390,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !69 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !69 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !69 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !69 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !69 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !69 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !69 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !69 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !69 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !69 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !69 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4, !llvm.access.group !69 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4, !llvm.access.group !69 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4, !llvm.access.group !69 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[TMP4]], i32*** [[TMP27]], align 4, !llvm.access.group !69 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32** [[TMP6]], i32*** [[TMP28]], align 4, !llvm.access.group !69 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32** [[TMP8]], i32*** [[TMP29]], align 4, !llvm.access.group !69 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.16*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !69 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !69 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !69 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !69 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !69 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !69 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP70:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -12594,16 +13496,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12616,99 +13513,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !72 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !72 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !72 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !72 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !72 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !72 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !72 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !72 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !72 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !72 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !72 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !72 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !72 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !72 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !72 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP10]], align 4, !llvm.access.group !72 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !72 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !72 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[TMP12]], align 4, !llvm.access.group !72 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !72 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !72 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP8]], align 4, !llvm.access.group !72 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !72 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !72 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !72 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !72 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !72 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP73:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -12728,25 +13629,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], i32* [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 4 // CHECK11-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* [[CH_ADDR]], i32* [[N_ADDR]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[CH_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32** [[A_ADDR]], i32*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[B_ADDR]], i32*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32** [[C_ADDR]], i32*** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.17*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.17* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.17*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12756,108 +13664,127 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK11-NEXT: store %struct.anon.17* [[__CONTEXT]], %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], %struct.anon.17* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !75 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !75 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !75 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]]), !llvm.access.group !75 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP27]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP28]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP4]], i32** [[TMP29]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[TMP6]], i32*** [[TMP30]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32** [[TMP8]], i32*** [[TMP31]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32** [[TMP10]], i32*** [[TMP32]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !75 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !75 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !75 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !75 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !75 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !75 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !75 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !75 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !75 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK11-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK11: cond.true10: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !75 // CHECK11-NEXT: br label [[COND_END12:%.*]] // CHECK11: cond.false11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 // CHECK11-NEXT: br label [[COND_END12]] // CHECK11: cond.end12: -// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] +// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ] // CHECK11-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !75 -// CHECK11-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !75 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP76:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP45]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK11-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP48]], 0 // CHECK11-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -12870,16 +13797,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12892,99 +13814,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !78 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !78 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !78 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !78 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !78 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !78 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !78 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !78 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !78 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !78 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !78 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !78 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !78 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !78 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !78 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP10]], align 4, !llvm.access.group !78 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !78 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !78 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[TMP12]], align 4, !llvm.access.group !78 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !78 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !78 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP8]], align 4, !llvm.access.group !78 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !78 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !78 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !78 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !78 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !78 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP79:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -13003,23 +13929,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_19:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 4 // CHECK11-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..38 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32** [[A_ADDR]], i32*** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32** [[B_ADDR]], i32*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[C_ADDR]], i32*** [[TMP3]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.19*)* @.omp_outlined..38 to void (i32*, i32*, ...)*), %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..38 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.19* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.19*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13029,81 +13961,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.19* [[__CONTEXT]], %struct.anon.19** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.19*, %struct.anon.19** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_19:%.*]], %struct.anon.19* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !81 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !81 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !81 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !81 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !81 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !81 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !81 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !81 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !81 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !81 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !81 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4, !llvm.access.group !81 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4, !llvm.access.group !81 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4, !llvm.access.group !81 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[TMP4]], i32*** [[TMP27]], align 4, !llvm.access.group !81 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32** [[TMP6]], i32*** [[TMP28]], align 4, !llvm.access.group !81 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32** [[TMP8]], i32*** [[TMP29]], align 4, !llvm.access.group !81 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.20*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !81 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !81 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !81 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !81 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !81 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !81 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP82:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -13116,16 +14067,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..39 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.20* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.20*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13138,99 +14084,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.20* [[__CONTEXT]], %struct.anon.20** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.20*, %struct.anon.20** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], %struct.anon.20* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !84 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !84 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !84 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !84 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !84 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !84 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !84 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !84 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !84 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !84 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !84 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !84 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !84 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !84 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !84 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP10]], align 4, !llvm.access.group !84 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !84 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !84 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[TMP12]], align 4, !llvm.access.group !84 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !84 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !84 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP8]], align 4, !llvm.access.group !84 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !84 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !84 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !84 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !84 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !84 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP85:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -13250,25 +14200,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], i32* [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 4 // CHECK11-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..42 to void (i32*, i32*, ...)*), i32* [[CH_ADDR]], i32* [[N_ADDR]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[CH_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32** [[A_ADDR]], i32*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[B_ADDR]], i32*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32** [[C_ADDR]], i32*** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.21*)* @.omp_outlined..42 to void (i32*, i32*, ...)*), %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..42 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.21* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.21*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -13279,89 +14236,107 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store %struct.anon.21* [[__CONTEXT]], %struct.anon.21** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], %struct.anon.21* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !87 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !87 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !87 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !87 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !87 -// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !87 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !87 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**, i32)* @.omp_outlined..43 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i32 [[TMP22]]), !llvm.access.group !87 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP27]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP28]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP4]], i32** [[TMP29]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[TMP6]], i32*** [[TMP30]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32** [[TMP8]], i32*** [[TMP31]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32** [[TMP10]], i32*** [[TMP32]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.22*)* @.omp_outlined..43 to void (i32*, i32*, ...)*), %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !87 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !87 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !87 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !87 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP88:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -13374,17 +14349,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..43 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.22* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.22*, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13397,118 +14367,124 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.22* [[__CONTEXT]], %struct.anon.22** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.22*, %struct.anon.22** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], %struct.anon.22* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !90 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !90 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !90 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !90 +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !90 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !90 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !90 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !90 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !90 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !90 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !90 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !90 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !90 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !90 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !90 -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32*, i32** [[TMP10]], align 4, !llvm.access.group !90 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !90 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP34]], i32 [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !90 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32*, i32** [[TMP12]], align 4, !llvm.access.group !90 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !90 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP37]], i32 [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !90 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP39]] +// CHECK11-NEXT: [[TMP40:%.*]] = load i32*, i32** [[TMP8]], align 4, !llvm.access.group !90 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !90 +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[TMP40]], i32 [[TMP41]] // CHECK11-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX10]], align 4, !llvm.access.group !90 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !90 -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !90 +// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK11-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !90 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP91:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] // CHECK11-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] // CHECK11-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP37]]) -// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 -// CHECK11-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP47:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, i32* [[TMP47]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP48]]) +// CHECK11-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK11-NEXT: br i1 [[TMP50]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK11-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP51]], 0 // CHECK11-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK11-NEXT: [[MUL16:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL16]] @@ -13527,23 +14503,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 4 // CHECK11-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..46 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32** [[A_ADDR]], i32*** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32** [[B_ADDR]], i32*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[C_ADDR]], i32*** [[TMP3]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.23*)* @.omp_outlined..46 to void (i32*, i32*, ...)*), %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..46 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.23* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.23*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13553,81 +14535,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.23* [[__CONTEXT]], %struct.anon.23** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.23*, %struct.anon.23** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], %struct.anon.23* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !93 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !93 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !93 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !93 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !93 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !93 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..47 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !93 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !93 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !93 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !93 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !93 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4, !llvm.access.group !93 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4, !llvm.access.group !93 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4, !llvm.access.group !93 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[TMP4]], i32*** [[TMP27]], align 4, !llvm.access.group !93 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32** [[TMP6]], i32*** [[TMP28]], align 4, !llvm.access.group !93 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32** [[TMP8]], i32*** [[TMP29]], align 4, !llvm.access.group !93 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.24*)* @.omp_outlined..47 to void (i32*, i32*, ...)*), %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !93 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !93 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !93 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !93 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !93 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !93 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP94:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -13640,16 +14641,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..47 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.24* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.24*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13662,82 +14658,86 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.24* [[__CONTEXT]], %struct.anon.24** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.24*, %struct.anon.24** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], %struct.anon.24* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP24]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !96 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !96 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !96 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !96 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !96 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !96 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !96 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !96 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !96 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 [[TMP22]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !96 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !96 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !96 -// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[TMP24]], i32 [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4, !llvm.access.group !96 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !96 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !96 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i32 [[TMP28]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32*, i32** [[TMP10]], align 4, !llvm.access.group !96 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !96 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP30]], i32 [[TMP31]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !96 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32*, i32** [[TMP12]], align 4, !llvm.access.group !96 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !96 +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4, !llvm.access.group !96 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32*, i32** [[TMP8]], align 4, !llvm.access.group !96 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !96 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[TMP36]], i32 [[TMP37]] // CHECK11-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX7]], align 4, !llvm.access.group !96 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !96 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !96 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !96 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP97:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -13745,12 +14745,12 @@ // CHECK11: omp.dispatch.inc: // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK11-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK11-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -13770,25 +14770,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], i32* [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 4 // CHECK11-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32**, i32**, i32**)* @.omp_outlined..50 to void (i32*, i32*, ...)*), i32* [[CH_ADDR]], i32* [[N_ADDR]], i32** [[A_ADDR]], i32** [[B_ADDR]], i32** [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[CH_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32** [[A_ADDR]], i32*** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[B_ADDR]], i32*** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32** [[C_ADDR]], i32*** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.25*)* @.omp_outlined..50 to void (i32*, i32*, ...)*), %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..50 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.25* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.25*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -13799,89 +14806,107 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_26:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[CH]], i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store %struct.anon.25* [[__CONTEXT]], %struct.anon.25** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.25*, %struct.anon.25** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25:%.*]], %struct.anon.25* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !99 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !99 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !99 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !99 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !99 -// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !99 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !99 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**, i32)* @.omp_outlined..51 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i32 [[TMP22]]), !llvm.access.group !99 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP27]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP28]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP4]], i32** [[TMP29]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32** [[TMP6]], i32*** [[TMP30]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32** [[TMP8]], i32*** [[TMP31]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32** [[TMP10]], i32*** [[TMP32]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.26*)* @.omp_outlined..51 to void (i32*, i32*, ...)*), %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !99 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !99 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !99 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !99 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP100:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -13894,17 +14919,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..51 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.26* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.26*, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13917,84 +14937,90 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.26* [[__CONTEXT]], %struct.anon.26** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.26*, %struct.anon.26** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_26:%.*]], %struct.anon.26* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32**, i32*** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !102 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !102 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !102 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !102 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !102 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !102 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !102 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !102 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !102 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP22]], i32 [[TMP23]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !102 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !102 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !102 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !102 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !102 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !102 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP28]], i32 [[TMP29]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32*, i32** [[TMP10]], align 4, !llvm.access.group !102 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !102 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !102 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32*, i32** [[TMP12]], align 4, !llvm.access.group !102 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !102 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP36]], i32 [[TMP37]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !102 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32*, i32** [[TMP8]], align 4, !llvm.access.group !102 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !102 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP39]], i32 [[TMP40]] // CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !102 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !102 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !102 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !102 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP103:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -14002,12 +15028,12 @@ // CHECK11: omp.dispatch.inc: // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK11-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP44]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -288,6 +288,7 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 @@ -297,20 +298,25 @@ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* // CHECK1-NEXT: store double* [[CONV1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[CONV]], double* [[TMP0]], i32* [[CONV2]], float* [[CONV3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[CONV]], double** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP2]], double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[CONV3]], float** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -319,102 +325,105 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store double* [[G]], double** [[G_ADDR]], align 8 -// CHECK1-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store double* [[TMP1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[TMP4]], double** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK1-NEXT: store double* [[TMP4]], double** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP9]], double** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK1-NEXT: store double [[TMP5]], double* [[G3]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8 -// CHECK1-NEXT: store double [[TMP7]], double* [[G14]], align 8 -// CHECK1-NEXT: store double* [[G14]], double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[SVAR6]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load float, float* [[TMP3]], align 4 -// CHECK1-NEXT: store float [[TMP9]], float* [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load double, double* [[TMP2]], align 8 +// CHECK1-NEXT: store double [[TMP10]], double* [[G]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 8 +// CHECK1-NEXT: store double [[TMP12]], double* [[G1]], align 8 +// CHECK1-NEXT: store double* [[G1]], double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load float, float* [[TMP8]], align 4 +// CHECK1-NEXT: store float [[TMP14]], float* [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load double, double* [[G3]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[G_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP21]], double* [[CONV]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[_TMP5]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP24:%.*]] = load volatile double, double* [[TMP23]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[G1_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP24]], double* [[CONV9]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[SVAR6]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CONV10:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP26]], i32* [[CONV10]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP28:%.*]] = load float, float* [[SFVAR7]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CONV11:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* -// CHECK1-NEXT: store float [[TMP28]], float* [[CONV11]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP25]], i64 [[TMP27]], i64 [[TMP29]]), !llvm.access.group !4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = load double, double* [[G]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store double [[TMP29]], double* [[TMP28]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP31:%.*]] = load double*, double** [[_TMP3]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP32:%.*]] = load volatile double, double* [[TMP31]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store double [[TMP32]], double* [[TMP30]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[SVAR]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP36:%.*]] = load float, float* [[SFVAR]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: store float [[TMP36]], float* [[TMP35]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK1-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK1-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -423,105 +432,112 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], i64* [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[G_ADDR]] to double* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to double* -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* -// CHECK1-NEXT: store double* [[CONV1]], double** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double, double* [[TMP5]], align 8 +// CHECK1-NEXT: store double [[TMP6]], double* [[G]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, double* [[TMP7]], align 8 +// CHECK1-NEXT: store double [[TMP8]], double* [[G1]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load float, float* [[TMP11]], align 4 +// CHECK1-NEXT: store float [[TMP12]], float* [[SFVAR]], align 4 +// CHECK1-NEXT: store double* [[G1]], double** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: store double 1.000000e+00, double* [[CONV]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP10:%.*]] = load double*, double** [[TMP]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: store i32 3, i32* [[CONV2]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: store float 4.000000e+00, float* [[CONV3]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[CONV]], double** [[TMP11]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load double*, double** [[TMP]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: store double* [[TMP13]], double** [[TMP12]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[CONV3]], float** [[TMP15]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !8 +// CHECK1-NEXT: store double 1.000000e+00, double* [[G]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP23]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store i32 3, i32* [[SVAR]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[G]], double** [[TMP24]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store double* [[TMP26]], double** [[TMP25]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP27]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP28]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -565,6 +581,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 @@ -579,20 +596,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, double* [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], double* [[G13]], align 8 // CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[G2]], double* [[TMP5]], i32* [[SVAR_ADDR]], float* [[CONV]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G2]], double** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load double*, double** [[_TMP4]], align 4 +// CHECK3-NEXT: store double* [[TMP7]], double** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[CONV]], float** [[TMP9]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -601,94 +623,102 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store double* [[TMP1]], double** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[TMP4]], double** [[_TMP1]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK3-NEXT: store double* [[TMP4]], double** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[TMP9]], double** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK3-NEXT: store double [[TMP5]], double* [[G3]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[_TMP1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 4 -// CHECK3-NEXT: store double [[TMP7]], double* [[G14]], align 8 -// CHECK3-NEXT: store double* [[G14]], double** [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[SVAR6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load float, float* [[TMP3]], align 4 -// CHECK3-NEXT: store float [[TMP9]], float* [[SFVAR7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load double, double* [[TMP2]], align 8 +// CHECK3-NEXT: store double [[TMP10]], double* [[G]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 4 +// CHECK3-NEXT: store double [[TMP12]], double* [[G1]], align 8 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load float, float* [[TMP8]], align 4 +// CHECK3-NEXT: store float [[TMP14]], float* [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP19:%.*]] = load double*, double** [[_TMP5]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP20:%.*]] = load volatile double, double* [[TMP19]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[G1_CASTED]] to double* -// CHECK3-NEXT: store double [[TMP20]], double* [[CONV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[G1_CASTED]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[SVAR6]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store i32 [[TMP22]], i32* [[SVAR_CASTED]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR7]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CONV9:%.*]] = bitcast i32* [[SFVAR_CASTED]] to float* -// CHECK3-NEXT: store float [[TMP24]], float* [[CONV9]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, i32, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], double* [[G3]], i32 [[TMP21]], i32 [[TMP23]], i32 [[TMP25]]), !llvm.access.group !5 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store double* [[G]], double** [[TMP26]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP28:%.*]] = load double*, double** [[_TMP3]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP29:%.*]] = load volatile double, double* [[TMP28]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store double [[TMP29]], double* [[TMP27]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[SVAR]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP31]], i32* [[TMP30]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP33:%.*]] = load float, float* [[SFVAR]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store float [[TMP33]], float* [[TMP32]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK3-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -697,105 +727,111 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]], i32 noundef [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[G1]], i32* [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], i32* [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[G1_ADDR]] to double* -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[SFVAR_ADDR]] to float* -// CHECK3-NEXT: store double* [[CONV]], double** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double, double* [[TMP7]], align 4 +// CHECK3-NEXT: store double [[TMP8]], double* [[G1]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load float, float* [[TMP11]], align 4 +// CHECK3-NEXT: store float [[TMP12]], float* [[SFVAR]], align 4 +// CHECK3-NEXT: store double* [[G1]], double** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK3-NEXT: store double [[TMP3]], double* [[G3]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load double, double* [[TMP6]], align 8 +// CHECK3-NEXT: store double [[TMP15]], double* [[G]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store double 1.000000e+00, double* [[G3]], align 8, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP12:%.*]] = load double*, double** [[TMP]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP12]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store i32 3, i32* [[SVAR_ADDR]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store float 4.000000e+00, float* [[CONV1]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G3]], double** [[TMP13]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store double* [[TMP15]], double** [[TMP14]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP16]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[CONV1]], float** [[TMP17]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !9 +// CHECK3-NEXT: store double 1.000000e+00, double* [[G]], align 8, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP24]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 3, i32* [[SVAR]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP25]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store double* [[TMP27]], double** [[TMP26]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP28]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP29]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -987,6 +1023,7 @@ // CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK8-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK8-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK8-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK8-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -998,21 +1035,27 @@ // CHECK8-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK8-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK8-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], [2 x i32]* [[TMP0]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[CONV1]]) +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK8-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 8 +// CHECK8-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK8-NEXT: store i32* [[CONV1]], i32** [[TMP8]], align 8 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK8-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1021,281 +1064,296 @@ // CHECK8-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK8-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK8-NEXT: [[_TMP8:%.*]] = alloca %struct.S*, align 8 -// CHECK8-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca %struct.S*, align 8 +// CHECK8-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK8-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK8-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 8 -// CHECK8-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK8-NEXT: store %struct.S* [[TMP5]], %struct.S** [[_TMP1]], align 8 +// CHECK8-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK8-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK8-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 8 +// CHECK8-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK8-NEXT: store %struct.S* [[TMP11]], %struct.S** [[_TMP1]], align 8 // CHECK8-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK8-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK8-NEXT: store i32 [[TMP6]], i32* [[T_VAR3]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK8-NEXT: [[TMP8:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP7]], i8* align 4 [[TMP8]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP9:%.*]] = bitcast [2 x %struct.S]* [[TMP2]] to %struct.S* -// CHECK8-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP10]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK8-NEXT: store i32 [[TMP12]], i32* [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP13:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK8-NEXT: [[TMP14:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP15:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK8-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP16]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK8-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK8-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 4, i1 false) +// CHECK8-NEXT: [[TMP17:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK8-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i64 4, i1 false) // CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP10]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done6: -// CHECK8-NEXT: [[TMP13:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 -// CHECK8-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK8-NEXT: [[TMP15:%.*]] = bitcast %struct.S* [[TMP13]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false) -// CHECK8-NEXT: store %struct.S* [[VAR7]], %struct.S** [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK8-NEXT: store i32 [[TMP16]], i32* [[SVAR9]], align 4 -// CHECK8-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP18]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP19:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 +// CHECK8-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK8-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[TMP19]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) +// CHECK8-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK8-NEXT: store i32 [[TMP22]], i32* [[SVAR]], align 4 +// CHECK8-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP24]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP25]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK8-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] -// CHECK8-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK8-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK8-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK8-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 -// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !5 -// CHECK8-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK8-NEXT: store i32 [[TMP28]], i32* [[CONV]], align 4, !llvm.access.group !5 -// CHECK8-NEXT: [[TMP29:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 -// CHECK8-NEXT: [[TMP30:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 8, !llvm.access.group !5 -// CHECK8-NEXT: [[TMP31:%.*]] = load i32, i32* [[SVAR9]], align 4, !llvm.access.group !5 -// CHECK8-NEXT: [[CONV11:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK8-NEXT: store i32 [[TMP31]], i32* [[CONV11]], align 4, !llvm.access.group !5 -// CHECK8-NEXT: [[TMP32:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8, !llvm.access.group !5 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP25]], i64 [[TMP27]], [2 x i32]* [[VEC4]], i64 [[TMP29]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP30]], i64 [[TMP32]]), !llvm.access.group !5 +// CHECK8-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP31:%.*]] = zext i32 [[TMP30]] to i64 +// CHECK8-NEXT: store i64 [[TMP31]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP33:%.*]] = zext i32 [[TMP32]] to i64 +// CHECK8-NEXT: store i64 [[TMP33]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP34]], align 8, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP35]], align 8, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP36]], align 8, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP38:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !5 +// CHECK8-NEXT: store i32 [[TMP38]], i32* [[TMP37]], align 8, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK8-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP39]], align 8, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK8-NEXT: [[TMP41:%.*]] = load %struct.S*, %struct.S** [[_TMP4]], align 8, !llvm.access.group !5 +// CHECK8-NEXT: store %struct.S* [[TMP41]], %struct.S** [[TMP40]], align 8, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK8-NEXT: [[TMP43:%.*]] = load i32, i32* [[SVAR]], align 4, !llvm.access.group !5 +// CHECK8-NEXT: store i32 [[TMP43]], i32* [[TMP42]], align 8, !llvm.access.group !5 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK8-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK8-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] // CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) -// CHECK8-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK8-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK8-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) +// CHECK8-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK8-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK8-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK8: .omp.final.then: // CHECK8-NEXT: store i32 2, i32* [[I]], align 4 // CHECK8-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK8: .omp.final.done: -// CHECK8-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN12]], i64 2 +// CHECK8-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP39]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP50]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done13: +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done7: // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK8-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC5:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR6:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK8-NEXT: [[VAR8:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK8-NEXT: [[_TMP9:%.*]] = alloca %struct.S*, align 8 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca %struct.S*, align 8 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK8-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK8-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK8-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* -// CHECK8-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 +// CHECK8-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK8-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK8-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 8 +// CHECK8-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK8-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 8 +// CHECK8-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 7 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK8-NEXT: store i32 [[TMP14]], i32* [[SVAR]], align 4 +// CHECK8-NEXT: store %struct.S* [[TMP12]], %struct.S** [[TMP]], align 8 // CHECK8-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK8-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK8-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK8-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK8-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK8-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK8-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC5]] to i8* -// CHECK8-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR6]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK8-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: [[TMP17:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK8-NEXT: [[TMP18:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP19:%.*]] = bitcast [2 x %struct.S]* [[TMP10]] to %struct.S* +// CHECK8-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP20]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK8-NEXT: [[TMP9:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK8-NEXT: [[TMP10:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i64 4, i1 false) +// CHECK8-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK8-NEXT: [[TMP22:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i64 4, i1 false) // CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done7: -// CHECK8-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK8-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[VAR8]] to i8* -// CHECK8-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[TMP11]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false) -// CHECK8-NEXT: store %struct.S* [[VAR8]], %struct.S** [[_TMP9]], align 8 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP20]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP23:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK8-NEXT: [[TMP24:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK8-NEXT: [[TMP25:%.*]] = bitcast %struct.S* [[TMP23]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i64 4, i1 false) +// CHECK8-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP27]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP28]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK8-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK8-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK8-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 -// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC5]], i64 0, i64 [[IDXPROM]] -// CHECK8-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK8-NEXT: [[TMP24:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8, !llvm.access.group !9 -// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK8-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK8-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR6]], i64 0, i64 [[IDXPROM11]] -// CHECK8-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[ARRAYIDX12]] to i8* -// CHECK8-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[TMP24]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group !9 +// CHECK8-NEXT: [[TMP34:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP35:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK8-NEXT: store i32 [[TMP34]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP36:%.*]] = load %struct.S*, %struct.S** [[_TMP4]], align 8, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP37:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK8-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK8-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* +// CHECK8-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[TMP36]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 4, i1 false), !llvm.access.group !9 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK8-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK8-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK8-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK8-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK8-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK8-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP42]]) +// CHECK8-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK8-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK8-NEXT: br i1 [[TMP44]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK8: .omp.final.then: // CHECK8-NEXT: store i32 2, i32* [[I]], align 4 // CHECK8-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK8: .omp.final.done: -// CHECK8-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR8]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR6]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN14]], i64 2 +// CHECK8-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP33]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP45]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done15: +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done10: // CHECK8-NEXT: ret void // // @@ -1313,35 +1371,35 @@ // CHECK8-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK8-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK8-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 // CHECK8-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 // CHECK8-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK8-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK8-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 8 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK8-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK8-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK8-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK8-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK8-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK8-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK8-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) -// CHECK8-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 8 -// CHECK8-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 8 +// CHECK8-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK8-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK8-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK8-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK8-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR]], align 8 +// CHECK8-NEXT: store %struct.S.1* [[TMP1]], %struct.S.1** [[TMP]], align 8 // CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK8-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK8-NEXT: store i32 [[TMP2]], i32* [[CONV]], align 4 // CHECK8-NEXT: [[TMP3:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK8-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK8-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK8-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK8-NEXT: [[TMP4:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK8-NEXT: [[TMP6:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 // CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK8-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64* // CHECK8-NEXT: store i64 [[TMP3]], i64* [[TMP8]], align 8 @@ -1359,19 +1417,19 @@ // CHECK8-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK8-NEXT: store i8* null, i8** [[TMP16]], align 8 // CHECK8-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK8-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK8-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 8 +// CHECK8-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.1]** +// CHECK8-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP18]], align 8 // CHECK8-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK8-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.0]** -// CHECK8-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP20]], align 8 +// CHECK8-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.1]** +// CHECK8-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP20]], align 8 // CHECK8-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 // CHECK8-NEXT: store i8* null, i8** [[TMP21]], align 8 // CHECK8-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK8-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK8-NEXT: store %struct.S.0* [[TMP5]], %struct.S.0** [[TMP23]], align 8 +// CHECK8-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.1** +// CHECK8-NEXT: store %struct.S.1* [[TMP5]], %struct.S.1** [[TMP23]], align 8 // CHECK8-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK8-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.0** -// CHECK8-NEXT: store %struct.S.0* [[TMP6]], %struct.S.0** [[TMP25]], align 8 +// CHECK8-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.1** +// CHECK8-NEXT: store %struct.S.1* [[TMP6]], %struct.S.1** [[TMP25]], align 8 // CHECK8-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 // CHECK8-NEXT: store i8* null, i8** [[TMP26]], align 8 // CHECK8-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -1381,21 +1439,21 @@ // CHECK8-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK8-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK8: omp_offload.failed: -// CHECK8-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]] +// CHECK8-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.1]* [[S_ARR]], %struct.S.1* [[TMP4]]) #[[ATTR4]] // CHECK8-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK8: omp_offload.cont: // CHECK8-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK8: arraydestroy.done2: -// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK8-NEXT: [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK8-NEXT: ret i32 [[TMP32]] // @@ -1435,373 +1493,395 @@ // // // CHECK8-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK8-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK8-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK8-NEXT: entry: -// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK8-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK8-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK8-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK8-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK8-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK8-NEXT: entry: -// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK8-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK8-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK8-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK8-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK8-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48 -// CHECK8-SAME: (i64 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (i64 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.1]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 // CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 8 +// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK8-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK8-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK8-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 8 +// CHECK8-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 8 // CHECK8-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* // CHECK8-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[CONV]], [2 x i32]* [[TMP0]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK8-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 8 +// CHECK8-NEXT: [[TMP2:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 8 +// CHECK8-NEXT: store %struct.S.1* [[TMP2]], %struct.S.1** [[TMP]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store [2 x %struct.S.1]* [[TMP1]], [2 x %struct.S.1]** [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP7:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK8-NEXT: store %struct.S.1* [[TMP7]], %struct.S.1** [[TMP6]], align 8 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK8-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 +// CHECK8-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK8-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK8-NEXT: [[_TMP8:%.*]] = alloca %struct.S.0*, align 8 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca %struct.S.1*, align 8 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK8-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 8 -// CHECK8-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK8-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[_TMP1]], align 8 +// CHECK8-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 8 +// CHECK8-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP]], align 8 +// CHECK8-NEXT: [[TMP9:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK8-NEXT: store %struct.S.1* [[TMP9]], %struct.S.1** [[_TMP1]], align 8 // CHECK8-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK8-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK8-NEXT: store i32 [[TMP5]], i32* [[T_VAR3]], align 4 -// CHECK8-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK8-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP6]], i8* align 4 [[TMP7]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP8:%.*]] = bitcast [2 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK8-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP9]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK8-NEXT: store i32 [[TMP10]], i32* [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK8-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S.1]* [[TMP6]] to %struct.S.1* +// CHECK8-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP8]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK8-NEXT: [[TMP10:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK8-NEXT: [[TMP11:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP10]], i8* align 4 [[TMP11]], i64 4, i1 false) -// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done6: -// CHECK8-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 -// CHECK8-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[VAR7]] to i8* -// CHECK8-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false) -// CHECK8-NEXT: store %struct.S.0* [[VAR7]], %struct.S.0** [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[TMP15:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK8-NEXT: [[TMP16:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i64 4, i1 false) +// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP17:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP1]], align 8 +// CHECK8-NEXT: [[TMP18:%.*]] = bitcast %struct.S.1* [[VAR]] to i8* +// CHECK8-NEXT: [[TMP19:%.*]] = bitcast %struct.S.1* [[TMP17]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) +// CHECK8-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK8-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK8-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK8-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK8-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK8-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !14 -// CHECK8-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK8-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 4, !llvm.access.group !14 -// CHECK8-NEXT: [[TMP27:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 -// CHECK8-NEXT: [[TMP28:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8, !llvm.access.group !14 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP23]], i64 [[TMP25]], [2 x i32]* [[VEC4]], i64 [[TMP27]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP28]]), !llvm.access.group !14 +// CHECK8-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK8-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP30:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK8-NEXT: store i64 [[TMP30]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP31]], align 8, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP32]], align 8, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP33]], align 8, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP35:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: store i32 [[TMP35]], i32* [[TMP34]], align 8, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK8-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP36]], align 8, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK8-NEXT: [[TMP38:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP4]], align 8, !llvm.access.group !14 +// CHECK8-NEXT: store %struct.S.1* [[TMP38]], %struct.S.1** [[TMP37]], align 8, !llvm.access.group !14 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !14 // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK8-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK8-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) -// CHECK8-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 -// CHECK8-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK8-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP42]]) +// CHECK8-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK8-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK8-NEXT: br i1 [[TMP44]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK8: .omp.final.then: // CHECK8-NEXT: store i32 2, i32* [[I]], align 4 // CHECK8-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK8: .omp.final.done: -// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i64 2 +// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP35]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done11: +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP45]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done7: // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK8-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK8-NEXT: [[_TMP8:%.*]] = alloca %struct.S.0*, align 8 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca %struct.S.1*, align 8 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK8-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK8-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK8-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK8-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP9]], align 8 +// CHECK8-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK8-NEXT: [[TMP12:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP11]], align 8 +// CHECK8-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP]], align 8 // CHECK8-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK8-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK8-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK8-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK8-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK8-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK8-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK8-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK8-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: [[TMP15:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK8-NEXT: [[TMP16:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S.1]* [[TMP10]] to %struct.S.1* +// CHECK8-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK8-NEXT: [[TMP9:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK8-NEXT: [[TMP10:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i64 4, i1 false) -// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done6: -// CHECK8-NEXT: [[TMP11:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK8-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[VAR7]] to i8* -// CHECK8-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP11]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false) -// CHECK8-NEXT: store %struct.S.0* [[VAR7]], %struct.S.0** [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP17]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[TMP19:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK8-NEXT: [[TMP20:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 4, i1 false) +// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP21:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK8-NEXT: [[TMP22:%.*]] = bitcast %struct.S.1* [[VAR]] to i8* +// CHECK8-NEXT: [[TMP23:%.*]] = bitcast %struct.S.1* [[TMP21]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) +// CHECK8-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP26]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK8-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK8-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK8-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK8-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !17 -// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK8-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 -// CHECK8-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8, !llvm.access.group !17 -// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK8-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK8-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK8-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[ARRAYIDX11]] to i8* -// CHECK8-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[TMP24]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group !17 +// CHECK8-NEXT: [[TMP32:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !17 +// CHECK8-NEXT: [[TMP33:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK8-NEXT: store i32 [[TMP32]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK8-NEXT: [[TMP34:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP4]], align 8, !llvm.access.group !17 +// CHECK8-NEXT: [[TMP35:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK8-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK8-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK8-NEXT: [[TMP36:%.*]] = bitcast %struct.S.1* [[ARRAYIDX7]] to i8* +// CHECK8-NEXT: [[TMP37:%.*]] = bitcast %struct.S.1* [[TMP34]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i64 4, i1 false), !llvm.access.group !17 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK8-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK8-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK8-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK8-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], 1 +// CHECK8-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK8-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK8-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK8-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) +// CHECK8-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK8-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK8-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK8: .omp.final.then: // CHECK8-NEXT: store i32 2, i32* [[I]], align 4 // CHECK8-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK8: .omp.final.done: -// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN13]], i64 2 +// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN9]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP33]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done14: +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP43]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK8-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done10: // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK8-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK8-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK8-NEXT: entry: -// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK8-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK8-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK8-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK8-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK8-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK8-NEXT: entry: -// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK8-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK8-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK8-NEXT: store i32 0, i32* [[F]], align 4 // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK8-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK8-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK8-NEXT: entry: -// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK8-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK8-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK8-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK8-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK8-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK8-NEXT: entry: -// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK8-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK8-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK8-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK8-NEXT: ret void // // @@ -1951,6 +2031,7 @@ // CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK10-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK10-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK10-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK10-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1960,21 +2041,27 @@ // CHECK10-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 // CHECK10-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[T_VAR_ADDR]], [2 x i32]* [[TMP0]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[SVAR_ADDR]]) +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK10-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK10-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK10-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1983,143 +2070,154 @@ // CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK10-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK10-NEXT: [[_TMP8:%.*]] = alloca %struct.S*, align 4 -// CHECK10-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK10-NEXT: [[_TMP4:%.*]] = alloca %struct.S*, align 4 +// CHECK10-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK10-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK10-NEXT: store %struct.S* [[TMP5]], %struct.S** [[_TMP1]], align 4 +// CHECK10-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK10-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK10-NEXT: store %struct.S* [[TMP11]], %struct.S** [[_TMP1]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK10-NEXT: store i32 [[TMP6]], i32* [[T_VAR3]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK10-NEXT: [[TMP8:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP7]], i8* align 4 [[TMP8]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP9:%.*]] = bitcast [2 x %struct.S]* [[TMP2]] to %struct.S* -// CHECK10-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP10]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK10-NEXT: store i32 [[TMP12]], i32* [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK10-NEXT: [[TMP14:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP15:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK10-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP16]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK10-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK10-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 4, i1 false) +// CHECK10-NEXT: [[TMP17:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK10-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i32 4, i1 false) // CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP10]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done6: -// CHECK10-NEXT: [[TMP13:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK10-NEXT: [[TMP15:%.*]] = bitcast %struct.S* [[TMP13]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false) -// CHECK10-NEXT: store %struct.S* [[VAR7]], %struct.S** [[_TMP8]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK10-NEXT: store i32 [[TMP16]], i32* [[SVAR9]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP18]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done3: +// CHECK10-NEXT: [[TMP19:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK10-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[TMP19]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) +// CHECK10-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP4]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK10-NEXT: store i32 [[TMP22]], i32* [[SVAR]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP24]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP25]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK10-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] -// CHECK10-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] +// CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !6 -// CHECK10-NEXT: store i32 [[TMP26]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 -// CHECK10-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 4, !llvm.access.group !6 -// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[SVAR9]], align 4, !llvm.access.group !6 -// CHECK10-NEXT: store i32 [[TMP29]], i32* [[SVAR_CASTED]], align 4, !llvm.access.group !6 -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4, !llvm.access.group !6 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP24]], i32 [[TMP25]], [2 x i32]* [[VEC4]], i32 [[TMP27]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP28]], i32 [[TMP30]]), !llvm.access.group !6 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: store i32 [[TMP30]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP32]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP33]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP34]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP36:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK10-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP37]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP39:%.*]] = load %struct.S*, %struct.S** [[_TMP4]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: store %struct.S* [[TMP39]], %struct.S** [[TMP38]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK10-NEXT: [[TMP41:%.*]] = load i32, i32* [[SVAR]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: store i32 [[TMP41]], i32* [[TMP40]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !6 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK10-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) -// CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK10-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK10-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP45]]) +// CHECK10-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK10-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK10-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK10: .omp.final.then: // CHECK10-NEXT: store i32 2, i32* [[I]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: -// CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i32 2 +// CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP37]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP48]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done12: +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done7: // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK10-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2127,127 +2225,135 @@ // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK10-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK10-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK10-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 +// CHECK10-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK10-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK10-NEXT: store i32 [[TMP14]], i32* [[SVAR]], align 4 +// CHECK10-NEXT: store %struct.S* [[TMP12]], %struct.S** [[TMP]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK10-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK10-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK10-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: [[TMP17:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK10-NEXT: [[TMP18:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP19:%.*]] = bitcast [2 x %struct.S]* [[TMP10]] to %struct.S* +// CHECK10-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP20]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK10-NEXT: [[TMP9:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK10-NEXT: [[TMP10:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i32 4, i1 false) +// CHECK10-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK10-NEXT: [[TMP22:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 4, i1 false) // CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done4: -// CHECK10-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[VAR5]] to i8* -// CHECK10-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[TMP11]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 4, i1 false) -// CHECK10-NEXT: store %struct.S* [[VAR5]], %struct.S** [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP20]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done2: +// CHECK10-NEXT: [[TMP23:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK10-NEXT: [[TMP24:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK10-NEXT: [[TMP25:%.*]] = bitcast %struct.S* [[TMP23]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i32 4, i1 false) +// CHECK10-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP27]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP28]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] +// CHECK10-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !10 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP23]] -// CHECK10-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK10-NEXT: [[TMP24:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4, !llvm.access.group !10 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 [[TMP25]] -// CHECK10-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[ARRAYIDX8]] to i8* -// CHECK10-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[TMP24]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group !10 +// CHECK10-NEXT: [[TMP34:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP35]] +// CHECK10-NEXT: store i32 [[TMP34]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[TMP36:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[TMP37:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP37]] +// CHECK10-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8* +// CHECK10-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[TMP36]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i32 4, i1 false), !llvm.access.group !10 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK10-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK10-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK10-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP42]]) +// CHECK10-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK10-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK10-NEXT: br i1 [[TMP44]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK10: .omp.final.then: // CHECK10-NEXT: store i32 2, i32* [[I]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: -// CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i32 2 +// CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP33]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP45]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done11: +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done8: // CHECK10-NEXT: ret void // // @@ -2265,34 +2371,34 @@ // CHECK10-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK10-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK10-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK10-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK10-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK10-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK10-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK10-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK10-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK10-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK10-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK10-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK10-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK10-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 4 -// CHECK10-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 4 +// CHECK10-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK10-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK10-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK10-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR]], align 4 +// CHECK10-NEXT: store %struct.S.1* [[TMP1]], %struct.S.1** [[TMP]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[T_VAR_CASTED]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK10-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[TMP8]], align 4 @@ -2310,19 +2416,19 @@ // CHECK10-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK10-NEXT: store i8* null, i8** [[TMP16]], align 4 // CHECK10-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK10-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK10-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.1]** +// CHECK10-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP18]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK10-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.0]** -// CHECK10-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP20]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.1]** +// CHECK10-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP20]], align 4 // CHECK10-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 // CHECK10-NEXT: store i8* null, i8** [[TMP21]], align 4 // CHECK10-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK10-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK10-NEXT: store %struct.S.0* [[TMP5]], %struct.S.0** [[TMP23]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.1** +// CHECK10-NEXT: store %struct.S.1* [[TMP5]], %struct.S.1** [[TMP23]], align 4 // CHECK10-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK10-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.0** -// CHECK10-NEXT: store %struct.S.0* [[TMP6]], %struct.S.0** [[TMP25]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.1** +// CHECK10-NEXT: store %struct.S.1* [[TMP6]], %struct.S.1** [[TMP25]], align 4 // CHECK10-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 // CHECK10-NEXT: store i8* null, i8** [[TMP26]], align 4 // CHECK10-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -2332,21 +2438,21 @@ // CHECK10-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK10-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK10: omp_offload.failed: -// CHECK10-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]] +// CHECK10-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.1]* [[S_ARR]], %struct.S.1* [[TMP4]]) #[[ATTR4]] // CHECK10-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK10: omp_offload.cont: // CHECK10-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK10: arraydestroy.done2: -// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK10-NEXT: ret i32 [[TMP32]] // @@ -2386,364 +2492,388 @@ // // // CHECK10-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK10-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK10-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK10-NEXT: entry: -// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK10-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK10-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK10-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK10-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK10-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK10-NEXT: entry: -// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK10-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK10-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK10-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK10-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l48 -// CHECK10-SAME: (i32 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (i32 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.1]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 4 +// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK10-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK10-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 +// CHECK10-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK10-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 4 // CHECK10-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[T_VAR_ADDR]], [2 x i32]* [[TMP0]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK10-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK10-NEXT: [[TMP2:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 4 +// CHECK10-NEXT: store %struct.S.1* [[TMP2]], %struct.S.1** [[TMP]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store [2 x %struct.S.1]* [[TMP1]], [2 x %struct.S.1]** [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP7:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK10-NEXT: store %struct.S.1* [[TMP7]], %struct.S.1** [[TMP6]], align 4 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 -// CHECK10-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 +// CHECK10-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK10-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK10-NEXT: [[_TMP8:%.*]] = alloca %struct.S.0*, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK10-NEXT: [[_TMP4:%.*]] = alloca %struct.S.1*, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK10-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[_TMP1]], align 4 +// CHECK10-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 4 +// CHECK10-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK10-NEXT: store %struct.S.1* [[TMP9]], %struct.S.1** [[_TMP1]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK10-NEXT: store i32 [[TMP5]], i32* [[T_VAR3]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK10-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 [[TMP7]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP8:%.*]] = bitcast [2 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP9]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK10-NEXT: store i32 [[TMP10]], i32* [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK10-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S.1]* [[TMP6]] to %struct.S.1* +// CHECK10-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP8]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK10-NEXT: [[TMP10:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK10-NEXT: [[TMP11:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 [[TMP11]], i32 4, i1 false) -// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done6: -// CHECK10-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[VAR7]] to i8* -// CHECK10-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false) -// CHECK10-NEXT: store %struct.S.0* [[VAR7]], %struct.S.0** [[_TMP8]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[TMP15:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK10-NEXT: [[TMP16:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i32 4, i1 false) +// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done3: +// CHECK10-NEXT: [[TMP17:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP1]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = bitcast %struct.S.1* [[VAR]] to i8* +// CHECK10-NEXT: [[TMP19:%.*]] = bitcast %struct.S.1* [[TMP17]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) +// CHECK10-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP4]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK10-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK10-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !15 -// CHECK10-NEXT: store i32 [[TMP24]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 -// CHECK10-NEXT: [[TMP26:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 4, !llvm.access.group !15 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP22]], i32 [[TMP23]], [2 x i32]* [[VEC4]], i32 [[TMP25]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP26]]), !llvm.access.group !15 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: store i32 [[TMP27]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: store i32 [[TMP28]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP29]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP30]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP31]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: store i32 [[TMP33]], i32* [[TMP32]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK10-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP34]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP36:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP4]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: store %struct.S.1* [[TMP36]], %struct.S.1** [[TMP35]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK10-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK10-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK10-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) +// CHECK10-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK10-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK10-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK10: .omp.final.then: // CHECK10-NEXT: store i32 2, i32* [[I]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: -// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i32 2 +// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP33]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done11: +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP43]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done7: // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK10-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK10-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK10-NEXT: [[_TMP3:%.*]] = alloca %struct.S.1*, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK10-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK10-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP9]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP12:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP11]], align 4 +// CHECK10-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK10-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK10-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK10-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: [[TMP15:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK10-NEXT: [[TMP16:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S.1]* [[TMP10]] to %struct.S.1* +// CHECK10-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK10-NEXT: [[TMP9:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK10-NEXT: [[TMP10:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i32 4, i1 false) -// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done4: -// CHECK10-NEXT: [[TMP11:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[VAR5]] to i8* -// CHECK10-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP11]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 4, i1 false) -// CHECK10-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP17]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[TMP19:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK10-NEXT: [[TMP20:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i32 4, i1 false) +// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done2: +// CHECK10-NEXT: [[TMP21:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = bitcast %struct.S.1* [[VAR]] to i8* +// CHECK10-NEXT: [[TMP23:%.*]] = bitcast %struct.S.1* [[TMP21]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false) +// CHECK10-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP3]], align 4 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP26]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK10-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !18 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP23]] -// CHECK10-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 -// CHECK10-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4, !llvm.access.group !18 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP25]] -// CHECK10-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* -// CHECK10-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[TMP24]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group !18 +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP33]] +// CHECK10-NEXT: store i32 [[TMP32]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP34:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP35]] +// CHECK10-NEXT: [[TMP36:%.*]] = bitcast %struct.S.1* [[ARRAYIDX5]] to i8* +// CHECK10-NEXT: [[TMP37:%.*]] = bitcast %struct.S.1* [[TMP34]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i32 4, i1 false), !llvm.access.group !18 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP38]], 1 +// CHECK10-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK10-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK10-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) +// CHECK10-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK10-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK10-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK10: .omp.final.then: // CHECK10-NEXT: store i32 2, i32* [[I]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: -// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i32 2 +// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN7]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP33]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done11: +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP43]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK10-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done8: // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK10-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK10-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK10-NEXT: entry: -// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK10-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK10-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK10-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK10-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK10-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK10-NEXT: entry: -// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK10-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK10-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK10-NEXT: store i32 0, i32* [[F]], align 4 // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK10-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK10-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK10-NEXT: entry: -// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK10-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK10-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK10-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK10-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK10-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK10-NEXT: entry: -// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK10-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK10-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK10-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK10-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp @@ -144,70 +144,83 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -216,12 +229,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -231,60 +243,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -295,75 +311,88 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !20 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !20 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -372,12 +401,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -387,43 +415,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 // CHECK1-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 @@ -431,17 +463,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -507,70 +539,83 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !26 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -579,12 +624,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -594,43 +638,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 // CHECK1-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 @@ -638,17 +686,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -659,75 +707,88 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !32 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !32 -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 +// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !32 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -736,12 +797,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -751,43 +811,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 // CHECK1-NEXT: call void @_Z3fn5v(), !llvm.access.group !35 @@ -795,17 +859,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -817,89 +881,103 @@ // CHECK1-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARG_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP2]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !38 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !38 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !38 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !38 -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !38 +// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !38 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -908,12 +986,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -923,43 +1000,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 // CHECK1-NEXT: call void @_Z3fn6v(), !llvm.access.group !41 @@ -967,17 +1048,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1041,70 +1122,83 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l59 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !44 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !44 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !44 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1113,12 +1207,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1128,43 +1221,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 // CHECK1-NEXT: call void @_Z3fn1v(), !llvm.access.group !47 @@ -1172,17 +1269,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1193,75 +1290,88 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l65 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..12 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !50 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !50 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !50 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !50 -// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !50 -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 +// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !50 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !50 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1270,12 +1380,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1285,43 +1394,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 // CHECK1-NEXT: call void @_Z3fn2v(), !llvm.access.group !53 @@ -1329,17 +1442,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1351,89 +1464,103 @@ // CHECK1-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK1-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARG_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !56 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !56 -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP2]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !56 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !56 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !56 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !56 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !56 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !56 -// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !56 -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !56 +// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !56 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !56 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1442,12 +1569,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1457,43 +1583,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !59 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !59 // CHECK1-NEXT: call void @_Z3fn3v(), !llvm.access.group !59 @@ -1501,17 +1631,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1553,70 +1683,83 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK3-SAME: () #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !11 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1625,12 +1768,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1640,60 +1782,64 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1704,75 +1850,88 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !20 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !20 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 -// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !20 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1781,12 +1940,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1796,43 +1954,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 // CHECK3-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 @@ -1840,17 +2002,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1916,70 +2078,83 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !26 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !26 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1988,12 +2163,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2003,43 +2177,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 // CHECK3-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 @@ -2047,17 +2225,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2068,75 +2246,88 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @.omp_outlined..7(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2145,12 +2336,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2160,43 +2350,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn5v() @@ -2204,17 +2398,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2226,18 +2420,21 @@ // CHECK3-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK3-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -2245,131 +2442,145 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED12:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR18:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_8]], align 8 +// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARG_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 +// CHECK3-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE7:%.*]] // CHECK3: omp_if.then: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !35 -// CHECK3-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK3-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK3-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !35 +// CHECK3-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP19]] to i1 // CHECK3-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !35 -// CHECK3-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK3-NEXT: store i8 [[FROMBOOL4]], i8* [[TMP18]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP20:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !35 +// CHECK3-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP20]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then6: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]), !llvm.access.group !35 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !35 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP5]]), !llvm.access.group !35 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !35 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]], !llvm.access.group !35 -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !35 +// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP21]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !35 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP5]]), !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_IF_END23:%.*]] +// CHECK3-NEXT: br label [[OMP_IF_END22:%.*]] // CHECK3: omp_if.else7: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] // CHECK3: omp.inner.for.cond8: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END22:%.*]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END21:%.*]] // CHECK3: omp.inner.for.body10: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TOBOOL11:%.*]] = trunc i8 [[TMP26]] to i1 -// CHECK3-NEXT: [[CONV13:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED12]] to i8* -// CHECK3-NEXT: [[FROMBOOL14:%.*]] = zext i1 [[TOBOOL11]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL14]], i8* [[CONV13]], align 1 -// CHECK3-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED12]], align 8 -// CHECK3-NEXT: [[TMP28:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP28]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL15]], label [[OMP_IF_THEN16:%.*]], label [[OMP_IF_ELSE17:%.*]] -// CHECK3: omp_if.then16: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) -// CHECK3-NEXT: br label [[OMP_IF_END19:%.*]] -// CHECK3: omp_if.else17: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..10(i32* [[TMP29]], i32* [[DOTBOUND_ZERO_ADDR18]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK3-NEXT: br label [[OMP_IF_END19]] -// CHECK3: omp_if.end19: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC20:%.*]] -// CHECK3: omp.inner.for.inc20: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK3-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK3-NEXT: store i64 [[TMP27]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = zext i32 [[TMP28]] to i64 +// CHECK3-NEXT: store i64 [[TMP29]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP30]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP31]], align 8 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP33:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP33]] to i1 +// CHECK3-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL12]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL13]], i8* [[TMP32]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP34]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] +// CHECK3: omp_if.then15: +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]]) +// CHECK3-NEXT: br label [[OMP_IF_END18:%.*]] +// CHECK3: omp_if.else16: +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 +// CHECK3-NEXT: call void @.omp_outlined..10(i32* [[TMP35]], i32* [[DOTBOUND_ZERO_ADDR17]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP5]]) +// CHECK3-NEXT: br label [[OMP_IF_END18]] +// CHECK3: omp_if.end18: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC19:%.*]] +// CHECK3: omp.inner.for.inc19: +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK3-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP38:![0-9]+]] -// CHECK3: omp.inner.for.end22: -// CHECK3-NEXT: br label [[OMP_IF_END23]] -// CHECK3: omp_if.end23: +// CHECK3: omp.inner.for.end21: +// CHECK3-NEXT: br label [[OMP_IF_END22]] +// CHECK3: omp_if.end22: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK3-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2378,13 +2589,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2394,49 +2604,56 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK3-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 @@ -2444,38 +2661,38 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP14]], 99 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK3: cond.true6: // CHECK3-NEXT: br label [[COND_END8:%.*]] // CHECK3: cond.false7: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END8]] // CHECK3: cond.end8: -// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP15]], [[COND_FALSE7]] ] +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] // CHECK3-NEXT: store i32 [[COND9]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] // CHECK3: omp.inner.for.cond10: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] // CHECK3: omp.inner.for.body12: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] // CHECK3-NEXT: store i32 [[ADD14]], i32* [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn6v() @@ -2483,8 +2700,8 @@ // CHECK3: omp.body.continue15: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] // CHECK3: omp.inner.for.inc16: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK3: omp.inner.for.end18: @@ -2492,12 +2709,12 @@ // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2506,13 +2723,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2522,49 +2738,56 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK3-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 // CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 @@ -2572,38 +2795,38 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP14]], 99 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK3: cond.true6: // CHECK3-NEXT: br label [[COND_END8:%.*]] // CHECK3: cond.false7: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END8]] // CHECK3: cond.end8: -// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP15]], [[COND_FALSE7]] ] +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] // CHECK3-NEXT: store i32 [[COND9]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] // CHECK3: omp.inner.for.cond10: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] // CHECK3: omp.inner.for.body12: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] // CHECK3-NEXT: store i32 [[ADD14]], i32* [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn6v() @@ -2611,8 +2834,8 @@ // CHECK3: omp.body.continue15: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] // CHECK3: omp.inner.for.inc16: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK3: omp.inner.for.end18: @@ -2620,12 +2843,12 @@ // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2689,70 +2912,83 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l59 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !47 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !47 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !47 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2761,12 +2997,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2776,43 +3011,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 // CHECK3-NEXT: call void @_Z3fn1v(), !llvm.access.group !50 @@ -2820,17 +3059,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2841,75 +3080,88 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l65 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..13 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @.omp_outlined..14(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2918,12 +3170,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2933,43 +3184,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn2v() @@ -2977,17 +3232,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2999,89 +3254,103 @@ // CHECK3-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK3-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARG_ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !55 -// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !55 -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK3-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP2]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !55 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !55 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !55 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !55 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !55 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !55 -// CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !55 -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !55 +// CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !55 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !55 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3090,12 +3359,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3105,43 +3373,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !58 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !58 // CHECK3-NEXT: call void @_Z3fn3v(), !llvm.access.group !58 @@ -3149,17 +3421,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3777,70 +4049,83 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK9-SAME: () #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !15 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3849,12 +4134,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3864,60 +4148,64 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3928,75 +4216,88 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !24 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 // CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4005,12 +4306,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4020,43 +4320,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK9-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 @@ -4064,17 +4368,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4140,70 +4444,83 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4212,12 +4529,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4227,43 +4543,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK9-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 @@ -4271,17 +4591,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4292,75 +4612,88 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !36 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 // CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !36 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4369,12 +4702,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4384,43 +4716,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK9-NEXT: call void @_Z3fn5v(), !llvm.access.group !39 @@ -4428,17 +4764,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4450,89 +4786,103 @@ // CHECK9-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARG_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP2]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !42 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !42 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !42 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !42 // CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !42 -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !42 +// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !42 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4541,12 +4891,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4556,43 +4905,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 // CHECK9-NEXT: call void @_Z3fn6v(), !llvm.access.group !45 @@ -4600,17 +4953,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4674,70 +5027,83 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l59 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !48 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !48 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !48 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4746,12 +5112,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4761,43 +5126,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 // CHECK9-NEXT: call void @_Z3fn1v(), !llvm.access.group !51 @@ -4805,17 +5174,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4826,75 +5195,88 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l65 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..12 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !54 +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !54 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 // CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 -// CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !54 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4903,12 +5285,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4918,43 +5299,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 // CHECK9-NEXT: call void @_Z3fn2v(), !llvm.access.group !57 @@ -4962,17 +5347,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4984,89 +5369,103 @@ // CHECK9-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK9-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARG_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !60 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !60 -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP2]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !60 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !60 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !60 +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !60 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !60 // CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !60 -// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !60 -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !60 +// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !60 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !60 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5075,12 +5474,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5090,43 +5488,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !63 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !63 // CHECK9-NEXT: call void @_Z3fn3v(), !llvm.access.group !63 @@ -5134,17 +5536,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5186,70 +5588,83 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK11-SAME: () #[[ATTR1:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !15 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5258,12 +5673,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5273,60 +5687,64 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5337,75 +5755,88 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !24 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !24 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 // CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 -// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !24 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5414,12 +5845,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5429,43 +5859,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK11-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 @@ -5473,17 +5907,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5549,70 +5983,83 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !30 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !30 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5621,12 +6068,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5636,43 +6082,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK11-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 @@ -5680,17 +6130,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5701,75 +6151,88 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @.omp_outlined..7(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5778,12 +6241,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5793,43 +6255,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn5v() @@ -5837,17 +6303,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5859,18 +6325,21 @@ // CHECK11-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK11-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -5878,131 +6347,145 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED12:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR18:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_8]], align 8 +// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARG_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 +// CHECK11-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE7:%.*]] // CHECK11: omp_if.then: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !39 -// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !39 -// CHECK11-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK11-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK11-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !39 +// CHECK11-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP19]] to i1 // CHECK11-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1, !llvm.access.group !39 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !39 -// CHECK11-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !39 -// CHECK11-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK11-NEXT: store i8 [[FROMBOOL4]], i8* [[TMP18]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP20:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !39 +// CHECK11-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP20]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then6: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]), !llvm.access.group !39 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !39 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP5]]), !llvm.access.group !39 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !39 // CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !39 -// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]], !llvm.access.group !39 -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !39 +// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP21]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !39 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP5]]), !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !39 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK11: omp.inner.for.end: -// CHECK11-NEXT: br label [[OMP_IF_END23:%.*]] +// CHECK11-NEXT: br label [[OMP_IF_END22:%.*]] // CHECK11: omp_if.else7: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] // CHECK11: omp.inner.for.cond8: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END22:%.*]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END21:%.*]] // CHECK11: omp.inner.for.body10: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK11-NEXT: [[TMP26:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[TOBOOL11:%.*]] = trunc i8 [[TMP26]] to i1 -// CHECK11-NEXT: [[CONV13:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED12]] to i8* -// CHECK11-NEXT: [[FROMBOOL14:%.*]] = zext i1 [[TOBOOL11]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL14]], i8* [[CONV13]], align 1 -// CHECK11-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED12]], align 8 -// CHECK11-NEXT: [[TMP28:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP28]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL15]], label [[OMP_IF_THEN16:%.*]], label [[OMP_IF_ELSE17:%.*]] -// CHECK11: omp_if.then16: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) -// CHECK11-NEXT: br label [[OMP_IF_END19:%.*]] -// CHECK11: omp_if.else17: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..10(i32* [[TMP29]], i32* [[DOTBOUND_ZERO_ADDR18]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK11-NEXT: br label [[OMP_IF_END19]] -// CHECK11: omp_if.end19: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC20:%.*]] -// CHECK11: omp.inner.for.inc20: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK11-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK11-NEXT: store i64 [[TMP27]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = zext i32 [[TMP28]] to i64 +// CHECK11-NEXT: store i64 [[TMP29]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP30]], align 8 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP31]], align 8 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP33:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP33]] to i1 +// CHECK11-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL12]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL13]], i8* [[TMP32]], align 8 +// CHECK11-NEXT: [[TMP34:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP34]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] +// CHECK11: omp_if.then15: +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]]) +// CHECK11-NEXT: br label [[OMP_IF_END18:%.*]] +// CHECK11: omp_if.else16: +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 +// CHECK11-NEXT: call void @.omp_outlined..10(i32* [[TMP35]], i32* [[DOTBOUND_ZERO_ADDR17]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP5]]) +// CHECK11-NEXT: br label [[OMP_IF_END18]] +// CHECK11: omp_if.end18: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC19:%.*]] +// CHECK11: omp.inner.for.inc19: +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK11-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP42:![0-9]+]] -// CHECK11: omp.inner.for.end22: -// CHECK11-NEXT: br label [[OMP_IF_END23]] -// CHECK11: omp_if.end23: +// CHECK11: omp.inner.for.end21: +// CHECK11-NEXT: br label [[OMP_IF_END22]] +// CHECK11: omp_if.end22: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK11-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK11-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6011,13 +6494,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6027,49 +6509,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK11-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 8 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 // CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 @@ -6077,38 +6566,38 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP14]], 99 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK11: cond.true6: // CHECK11-NEXT: br label [[COND_END8:%.*]] // CHECK11: cond.false7: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END8]] // CHECK11: cond.end8: -// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP15]], [[COND_FALSE7]] ] +// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] // CHECK11-NEXT: store i32 [[COND9]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] // CHECK11: omp.inner.for.cond10: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] // CHECK11: omp.inner.for.body12: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] // CHECK11-NEXT: store i32 [[ADD14]], i32* [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn6v() @@ -6116,8 +6605,8 @@ // CHECK11: omp.body.continue15: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] // CHECK11: omp.inner.for.inc16: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK11: omp.inner.for.end18: @@ -6125,12 +6614,12 @@ // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6139,13 +6628,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6155,49 +6643,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK11-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 8 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 // CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !47 @@ -6205,38 +6700,38 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP14]], 99 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK11: cond.true6: // CHECK11-NEXT: br label [[COND_END8:%.*]] // CHECK11: cond.false7: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END8]] // CHECK11: cond.end8: -// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP15]], [[COND_FALSE7]] ] +// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] // CHECK11-NEXT: store i32 [[COND9]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] // CHECK11: omp.inner.for.cond10: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] // CHECK11: omp.inner.for.body12: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] // CHECK11-NEXT: store i32 [[ADD14]], i32* [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn6v() @@ -6244,8 +6739,8 @@ // CHECK11: omp.body.continue15: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] // CHECK11: omp.inner.for.inc16: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK11: omp.inner.for.end18: @@ -6253,12 +6748,12 @@ // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6322,70 +6817,83 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l59 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !51 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !51 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !51 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !51 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !51 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6394,12 +6902,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6409,43 +6916,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !54 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !54 // CHECK11-NEXT: call void @_Z3fn1v(), !llvm.access.group !54 @@ -6453,17 +6964,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6474,75 +6985,88 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l65 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..13 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @.omp_outlined..14(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6551,12 +7075,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6566,43 +7089,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn2v() @@ -6610,17 +7137,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6632,89 +7159,103 @@ // CHECK11-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK11-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARG_ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 -// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !59 -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK11-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP2]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !59 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !59 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !59 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !59 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !59 // CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !59 -// CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !59 -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !59 +// CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !59 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6723,12 +7264,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6738,43 +7278,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 // CHECK11-NEXT: call void @_Z3fn3v(), !llvm.access.group !62 @@ -6782,17 +7326,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp @@ -233,6 +233,7 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 @@ -242,20 +243,25 @@ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* // CHECK1-NEXT: store double* [[CONV1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[CONV]], double* [[TMP0]], i32* [[CONV2]], float* [[CONV3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[CONV]], double** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP2]], double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[CONV3]], float** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -264,107 +270,121 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store double* [[G]], double** [[G_ADDR]], align 8 -// CHECK1-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store double* [[TMP1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[TMP4]], double** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK1-NEXT: store double* [[TMP4]], double** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP9]], double** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load double*, double** [[_TMP1]], align 8 -// CHECK1-NEXT: store double* [[G14]], double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP1]], align 8 +// CHECK1-NEXT: store double* [[G1]], double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load double*, double** [[_TMP5]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, double*, double*, i32*, float*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP14]], i64 [[TMP16]], double* [[G3]], double* [[TMP17]], i32* [[SVAR6]], float* [[SFVAR7]]), !llvm.access.group !4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP22]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP23]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store double* [[G]], double** [[TMP24]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[_TMP3]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store double* [[TMP26]], double** [[TMP25]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP27]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP28]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK1-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP24]], double* [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load double, double* [[TMP25]], align 8 -// CHECK1-NEXT: store volatile double [[TMP26]], double* [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP27]], i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load float, float* [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP28]], float* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, double* [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], double* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load double*, double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, double* [[TMP36]], align 8 +// CHECK1-NEXT: store volatile double [[TMP37]], double* [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP38]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP39]], float* [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -372,112 +392,116 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store double* [[G]], double** [[G_ADDR]], align 8 -// CHECK1-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store double* [[TMP1]], double** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double*, double** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP11]], align 8 +// CHECK1-NEXT: store double* [[TMP8]], double** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[G14]], double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[G1]], double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: store double 1.000000e+00, double* [[G3]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP15:%.*]] = load double*, double** [[_TMP5]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP15]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: store i32 3, i32* [[SVAR6]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR7]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[G3]], double** [[TMP16]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load double*, double** [[_TMP5]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: store double* [[TMP18]], double** [[TMP17]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[SVAR6]], i32** [[TMP19]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[SFVAR7]], float** [[TMP20]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !8 +// CHECK1-NEXT: store double 1.000000e+00, double* [[G]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP3]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP24]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store i32 3, i32* [[SVAR]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[G]], double** [[TMP25]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load double*, double** [[_TMP3]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store double* [[TMP27]], double** [[TMP26]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP28]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP29]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK1-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP26:%.*]] = load double, double* [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP26]], double* [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load double, double* [[TMP27]], align 8 -// CHECK1-NEXT: store volatile double [[TMP28]], double* [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP29]], i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load float, float* [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP30]], float* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, double* [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], double* [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load double*, double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, double* [[TMP36]], align 8 +// CHECK1-NEXT: store volatile double [[TMP37]], double* [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP38]], i32* [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP39]], float* [[TMP12]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -519,6 +543,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 @@ -533,20 +558,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, double* [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], double* [[G13]], align 8 // CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[G2]], double* [[TMP5]], i32* [[SVAR_ADDR]], float* [[CONV]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G2]], double** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load double*, double** [[_TMP4]], align 4 +// CHECK3-NEXT: store double* [[TMP7]], double** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[CONV]], float** [[TMP9]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -555,105 +585,119 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store double* [[TMP1]], double** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[TMP4]], double** [[_TMP1]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK3-NEXT: store double* [[TMP4]], double** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[TMP9]], double** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double*, double** [[_TMP1]], align 4 -// CHECK3-NEXT: store double* [[G14]], double** [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP1]], align 4 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP15:%.*]] = load double*, double** [[_TMP5]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, double*, i32*, float*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP13]], i32 [[TMP14]], double* [[G3]], double* [[TMP15]], i32* [[SVAR6]], float* [[SFVAR7]]), !llvm.access.group !5 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP20]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP21]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store double* [[G]], double** [[TMP22]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP3]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store double* [[TMP24]], double** [[TMP23]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP25]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP26]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK3-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[G3]], align 8 -// CHECK3-NEXT: store volatile double [[TMP22]], double* [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[TMP23]], align 4 -// CHECK3-NEXT: store volatile double [[TMP24]], double* [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[SVAR6]], align 4 -// CHECK3-NEXT: store i32 [[TMP25]], i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load float, float* [[SFVAR7]], align 4 -// CHECK3-NEXT: store float [[TMP26]], float* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP33]], double* [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load double*, double** [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, double* [[TMP34]], align 4 +// CHECK3-NEXT: store volatile double [[TMP35]], double* [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP36]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP37]], float* [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -661,110 +705,114 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store double* [[TMP1]], double** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double*, double** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP11]], align 4 +// CHECK3-NEXT: store double* [[TMP8]], double** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store double 1.000000e+00, double* [[G2]], align 8, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP15:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP15]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store i32 3, i32* [[SVAR5]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G2]], double** [[TMP16]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP18:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store double* [[TMP18]], double** [[TMP17]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR5]], i32** [[TMP19]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[SFVAR6]], float** [[TMP20]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !9 +// CHECK3-NEXT: store double 1.000000e+00, double* [[G]], align 8, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP2]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP24]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 3, i32* [[SVAR]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP25]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load double*, double** [[_TMP2]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store double* [[TMP27]], double** [[TMP26]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP28]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP29]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK3-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP26:%.*]] = load double, double* [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP26]], double* [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP27:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load double, double* [[TMP27]], align 4 -// CHECK3-NEXT: store volatile double [[TMP28]], double* [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP29]], i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load float, float* [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP30]], float* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP35]], double* [[TMP6]], align 8 +// CHECK3-NEXT: [[TMP36:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load double, double* [[TMP36]], align 4 +// CHECK3-NEXT: store volatile double [[TMP37]], double* [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP38]], i32* [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP39]], float* [[TMP12]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -955,6 +1003,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -966,21 +1015,27 @@ // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK9-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], [2 x i32]* [[TMP0]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[CONV1]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP8]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -989,33 +1044,38 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: store %struct.S* [[TMP5]], %struct.S** [[_TMP1]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP11]], %struct.S** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1025,119 +1085,129 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store %struct.S* [[VAR6]], %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i32*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC4]], i32* [[T_VAR3]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP18]], i32* [[SVAR8]]), !llvm.access.group !5 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP24]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP25]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP26]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[TMP27]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP28]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP30:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: store %struct.S* [[TMP30]], %struct.S** [[TMP29]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store i32* [[SVAR]], i32** [[TMP31]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK9-NEXT: br i1 [[TMP39]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP27]], i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP30:%.*]] = bitcast [2 x %struct.S]* [[S_ARR5]] to %struct.S* -// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN10]], [[TMP31]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP40]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK9-NEXT: [[TMP42:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP41]], i8* align 4 [[TMP42]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP43:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN5]], [[TMP44]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP30]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP43]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP31]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done11: -// CHECK9-NEXT: [[TMP34:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[TMP6]] to i8* -// CHECK9-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[TMP34]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP37]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP44]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done6: +// CHECK9-NEXT: [[TMP47:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[TMP12]] to i8* +// CHECK9-NEXT: [[TMP49:%.*]] = bitcast %struct.S* [[TMP47]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP48]], i8* align 4 [[TMP49]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP50:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP50]], i32* [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN12]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP51]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done13: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done8: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1145,39 +1215,43 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP12]], %struct.S** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1187,115 +1261,115 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store %struct.S* [[VAR6]], %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP18:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* -// CHECK9-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[TMP18]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !9 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP26]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* +// CHECK9-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[TMP28]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 4, i1 false), !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK9-NEXT: br i1 [[TMP38]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP29]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: [[TMP31:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP32:%.*]] = bitcast [2 x %struct.S]* [[S_ARR5]] to %struct.S* -// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN13]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN13]], [[TMP33]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP39]], i32* [[TMP8]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK9-NEXT: [[TMP41:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP42:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN8]], [[TMP43]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP32]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP42]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP44:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP44]], i8* align 4 [[TMP45]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP33]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done14: -// CHECK9-NEXT: [[TMP36:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP7]] to i8* -// CHECK9-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[TMP36]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP39]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP43]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP46:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[TMP17]] to i8* +// CHECK9-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[TMP46]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP49]], i32* [[TMP14]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN15]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP50]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done16: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1313,35 +1387,35 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) -// CHECK9-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK9-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP1]], %struct.S.1** [[TMP]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64* // CHECK9-NEXT: store i64 [[TMP3]], i64* [[TMP8]], align 8 @@ -1359,19 +1433,19 @@ // CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK9-NEXT: store i8* null, i8** [[TMP16]], align 8 // CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK9-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.1]** +// CHECK9-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP18]], align 8 // CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK9-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.0]** -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.1]** +// CHECK9-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP20]], align 8 // CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 // CHECK9-NEXT: store i8* null, i8** [[TMP21]], align 8 // CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK9-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK9-NEXT: store %struct.S.0* [[TMP5]], %struct.S.0** [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.1** +// CHECK9-NEXT: store %struct.S.1* [[TMP5]], %struct.S.1** [[TMP23]], align 8 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK9-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.0** -// CHECK9-NEXT: store %struct.S.0* [[TMP6]], %struct.S.0** [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.1** +// CHECK9-NEXT: store %struct.S.1* [[TMP6]], %struct.S.1** [[TMP25]], align 8 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 // CHECK9-NEXT: store i8* null, i8** [[TMP26]], align 8 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -1381,21 +1455,21 @@ // CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK9-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: -// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]] +// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.1]* [[S_ARR]], %struct.S.1* [[TMP4]]) #[[ATTR4]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done2: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK9-NEXT: ret i32 [[TMP32]] // @@ -1435,407 +1509,431 @@ // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 -// CHECK9-SAME: (i64 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i64 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.1]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 8 +// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK9-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[CONV]], [2 x i32]* [[TMP0]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP2]], %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S.1]* [[TMP1]], [2 x %struct.S.1]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP7]], %struct.S.1** [[TMP6]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[_TMP1]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP9]], %struct.S.1** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: -// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store %struct.S.0* [[VAR6]], %struct.S.0** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !14 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP14]], i64 [[TMP16]], [2 x i32]* [[VEC4]], i32* [[T_VAR3]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP17]]), !llvm.access.group !14 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK9-NEXT: store i64 [[TMP19]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP22]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP23]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP24]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[TMP25]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP26]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP28:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: store %struct.S.1* [[TMP28]], %struct.S.1** [[TMP27]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !14 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP26]], i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR5]] to %struct.S.0* -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN9]], [[TMP30]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP37]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK9-NEXT: [[TMP39:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP40:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN5]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN5]], [[TMP41]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done10: -// CHECK9-NEXT: [[TMP33:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[TMP5]] to i8* -// CHECK9-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[TMP33]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP42:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP43:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP41]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done6: +// CHECK9-NEXT: [[TMP44:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = bitcast %struct.S.1* [[TMP10]] to i8* +// CHECK9-NEXT: [[TMP46:%.*]] = bitcast %struct.S.1* [[TMP44]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done12: +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP47]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done8: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP11]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: -// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store %struct.S.0* [[VAR6]], %struct.S.0** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[ARRAYIDX10]] to i8* -// CHECK9-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[TMP17]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 4, i1 false), !llvm.access.group !17 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP26:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 8, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: [[TMP28:%.*]] = bitcast %struct.S.1* [[ARRAYIDX6]] to i8* +// CHECK9-NEXT: [[TMP29:%.*]] = bitcast %struct.S.1* [[TMP26]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 4, i1 false), !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK9-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP28]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP31:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR5]] to %struct.S.0* -// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN12]], [[TMP32]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP37]], i32* [[TMP8]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK9-NEXT: [[TMP39:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP40:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN8]], [[TMP41]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP32]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP35:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[TMP6]] to i8* -// CHECK9-NEXT: [[TMP37:%.*]] = bitcast %struct.S.0* [[TMP35]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP42:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP43:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP41]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP44:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = bitcast %struct.S.1* [[TMP15]] to i8* +// CHECK9-NEXT: [[TMP46:%.*]] = bitcast %struct.S.1* [[TMP44]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP47]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, i32* [[F]], align 4 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1986,6 +2084,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1995,21 +2094,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 // CHECK11-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[T_VAR_ADDR]], [2 x i32]* [[TMP0]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2018,33 +2123,38 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP5]], %struct.S** [[_TMP1]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP11]], %struct.S** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -2054,117 +2164,127 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store %struct.S* [[VAR6]], %struct.S** [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC4]], i32* [[T_VAR3]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP16]], i32* [[SVAR8]]), !llvm.access.group !6 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP22]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP23]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP24]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[TMP25]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP26]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store %struct.S* [[TMP28]], %struct.S** [[TMP27]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store i32* [[SVAR]], i32** [[TMP29]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK11-NEXT: br i1 [[TMP37]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], i32* [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK11-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast [2 x %struct.S]* [[S_ARR5]] to %struct.S* -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN10]], [[TMP29]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP38]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK11-NEXT: [[TMP40:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP39]], i8* align 4 [[TMP40]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP41:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN5]], [[TMP42]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP28]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP41]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP44:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP43]], i8* align 4 [[TMP44]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP32:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[TMP6]] to i8* -// CHECK11-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP32]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK11-NEXT: store i32 [[TMP35]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP42]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done6: +// CHECK11-NEXT: [[TMP45:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[TMP12]] to i8* +// CHECK11-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[TMP45]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP48]], i32* [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP49]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done8: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2172,37 +2292,41 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP12]], %struct.S** [[TMP]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -2212,113 +2336,113 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store %struct.S* [[VAR5]], %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP18:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* -// CHECK11-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[TMP18]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !10 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP29]] +// CHECK11-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK11-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[TMP28]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 4, i1 false), !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK11-NEXT: br i1 [[TMP38]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP29]], i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: [[TMP31:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP32:%.*]] = bitcast [2 x %struct.S]* [[S_ARR4]] to %struct.S* -// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN11]], [[TMP33]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP39]], i32* [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK11-NEXT: [[TMP41:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP42:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN6]], [[TMP43]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP32]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP42]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP44:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP44]], i8* align 4 [[TMP45]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP33]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP36:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP7]] to i8* -// CHECK11-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[TMP36]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP39]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP43]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP46:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[TMP17]] to i8* +// CHECK11-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[TMP46]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP49:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP49]], i32* [[TMP14]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP50]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -2336,34 +2460,34 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK11-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK11-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK11-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK11-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP1]], %struct.S.1** [[TMP]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK11-NEXT: store i32 [[TMP2]], i32* [[T_VAR_CASTED]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32* // CHECK11-NEXT: store i32 [[TMP3]], i32* [[TMP8]], align 4 @@ -2381,19 +2505,19 @@ // CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK11-NEXT: store i8* null, i8** [[TMP16]], align 4 // CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.1]** +// CHECK11-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP18]], align 4 // CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.0]** -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.1]** +// CHECK11-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP20]], align 4 // CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 // CHECK11-NEXT: store i8* null, i8** [[TMP21]], align 4 // CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK11-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK11-NEXT: store %struct.S.0* [[TMP5]], %struct.S.0** [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.1** +// CHECK11-NEXT: store %struct.S.1* [[TMP5]], %struct.S.1** [[TMP23]], align 4 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK11-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.0** -// CHECK11-NEXT: store %struct.S.0* [[TMP6]], %struct.S.0** [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.1** +// CHECK11-NEXT: store %struct.S.1* [[TMP6]], %struct.S.1** [[TMP25]], align 4 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 // CHECK11-NEXT: store i8* null, i8** [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -2403,21 +2527,21 @@ // CHECK11-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK11-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: -// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]] +// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.1]* [[S_ARR]], %struct.S.1* [[TMP4]]) #[[ATTR4]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done2: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK11-NEXT: ret i32 [[TMP32]] // @@ -2457,400 +2581,424 @@ // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 -// CHECK11-SAME: (i32 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.1]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 4 +// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 +// CHECK11-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[T_VAR_ADDR]], [2 x i32]* [[TMP0]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP2]], %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S.1]* [[TMP1]], [2 x %struct.S.1]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP7]], %struct.S.1** [[TMP6]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[_TMP1]], align 4 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP9]], %struct.S.1** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: -// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store %struct.S.0* [[VAR6]], %struct.S.0** [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP15:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP13]], i32 [[TMP14]], [2 x i32]* [[VEC4]], i32* [[T_VAR3]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP15]]), !llvm.access.group !15 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP20]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP21]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP22]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[TMP23]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP24]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP26:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: store %struct.S.1* [[TMP26]], %struct.S.1** [[TMP25]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP24]], i32* [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK11-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR5]] to %struct.S.0* -// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN9]], [[TMP28]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP35]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK11-NEXT: [[TMP37:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN5]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN5]], [[TMP39]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP27]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done10: -// CHECK11-NEXT: [[TMP31:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[TMP5]] to i8* -// CHECK11-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[TMP31]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP40:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP41:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP39]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done6: +// CHECK11-NEXT: [[TMP42:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = bitcast %struct.S.1* [[TMP10]] to i8* +// CHECK11-NEXT: [[TMP44:%.*]] = bitcast %struct.S.1* [[TMP42]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP43]], i8* align 4 [[TMP44]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN7]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP34]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done12: +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP45]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done8: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP11]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: -// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 [[TMP18]] -// CHECK11-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* -// CHECK11-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[TMP17]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i32 4, i1 false), !llvm.access.group !18 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP25]] +// CHECK11-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP26:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: [[TMP28:%.*]] = bitcast %struct.S.1* [[ARRAYIDX4]] to i8* +// CHECK11-NEXT: [[TMP29:%.*]] = bitcast %struct.S.1* [[TMP26]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 4, i1 false), !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK11-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK11-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP28]], i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP31:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN10]], [[TMP32]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP37]], i32* [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK11-NEXT: [[TMP39:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP40:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN6]], [[TMP41]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP32]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP35:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[TMP6]] to i8* -// CHECK11-NEXT: [[TMP37:%.*]] = bitcast %struct.S.0* [[TMP35]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP42:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP43:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP41]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP44:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = bitcast %struct.S.1* [[TMP15]] to i8* +// CHECK11-NEXT: [[TMP46:%.*]] = bitcast %struct.S.1* [[TMP44]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP47]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, i32* [[F]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp @@ -203,71 +203,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK1-SAME: () #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !9 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !9 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group !9 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -276,12 +289,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -291,43 +303,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK1-NEXT: invoke void @_Z3foov() @@ -337,27 +353,27 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10:[0-9]+]], !llvm.access.group !13 +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10:[0-9]+]], !llvm.access.group !13 // CHECK1-NEXT: unreachable // // @@ -372,78 +388,92 @@ // CHECK1-SAME: (i64 [[A:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i8* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i8* [[CONV]], i8** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8* nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i8* [[A]], i8** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8*, i8** [[A_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP0]], align 1, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]), !llvm.access.group !18 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP10:%.*]] = load i8, i8* [[TMP2]], align 1, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group !18 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !18 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8, !llvm.access.group !18 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -452,12 +482,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -467,43 +496,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 // CHECK1-NEXT: invoke void @_Z3foov() @@ -513,27 +546,27 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10]], !llvm.access.group !21 +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10]], !llvm.access.group !21 // CHECK1-NEXT: unreachable // // @@ -621,71 +654,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group !24 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 5), !llvm.access.group !24 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !24 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -694,12 +740,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -709,43 +754,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK1-NEXT: invoke void @_Z3foov() @@ -755,98 +804,111 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10]], !llvm.access.group !27 +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10]], !llvm.access.group !27 // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group !30 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 23), !llvm.access.group !30 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !30 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !30 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -855,12 +917,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -870,43 +931,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK1-NEXT: invoke void @_Z3foov() @@ -916,98 +981,111 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10]], !llvm.access.group !33 +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10]], !llvm.access.group !33 // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group !36 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1), !llvm.access.group !36 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !36 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !36 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1016,12 +1094,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1031,43 +1108,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK1-NEXT: invoke void @_Z3foov() @@ -1077,77 +1158,84 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10]], !llvm.access.group !39 +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10]], !llvm.access.group !39 // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: invoke void @_ZN1SC1El(%struct.S* nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 23) @@ -1156,48 +1244,53 @@ // CHECK1-NEXT: [[CALL:%.*]] = invoke signext i8 @_ZN1ScvcEv(%struct.S* nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK1-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]], !llvm.access.group !42 // CHECK1: invoke.cont2: -// CHECK1-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]), !llvm.access.group !42 +// CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]), !llvm.access.group !42 // CHECK1-NEXT: call void @_ZN1SD1Ev(%struct.S* nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]], !llvm.access.group !42 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !42 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP13]], align 8, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP14]], align 8, !llvm.access.group !42 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !42 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP19:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]], !llvm.access.group !42 +// CHECK1-NEXT: [[TMP20:%.*]] = extractvalue { i8*, i32 } [[TMP19]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP20]]) #[[ATTR10]], !llvm.access.group !42 // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1207,43 +1300,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 // CHECK1-NEXT: invoke void @_Z3foov() @@ -1253,27 +1350,27 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10]], !llvm.access.group !45 +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10]], !llvm.access.group !45 // CHECK1-NEXT: unreachable // // @@ -1733,71 +1830,84 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK5-SAME: () #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !9 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !9 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group !9 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !9 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1806,12 +1916,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1821,43 +1930,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK5-NEXT: invoke void @_Z3foov() @@ -1867,27 +1980,27 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10:[0-9]+]], !llvm.access.group !13 +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10:[0-9]+]], !llvm.access.group !13 // CHECK5-NEXT: unreachable // // @@ -1902,78 +2015,92 @@ // CHECK5-SAME: (i64 [[A:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i8* [[CONV]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i8* [[CONV]], i8** [[TMP0]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8* nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i8* [[A]], i8** [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i8*, i8** [[A_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP0]], align 1, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]), !llvm.access.group !18 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP10:%.*]] = load i8, i8* [[TMP2]], align 1, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group !18 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !18 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8, !llvm.access.group !18 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1982,12 +2109,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1997,43 +2123,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2043,27 +2173,27 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10]], !llvm.access.group !21 +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10]], !llvm.access.group !21 // CHECK5-NEXT: unreachable // // @@ -2142,71 +2272,84 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group !24 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 5), !llvm.access.group !24 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !24 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !24 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2215,12 +2358,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2230,43 +2372,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2276,98 +2422,111 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10]], !llvm.access.group !27 +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10]], !llvm.access.group !27 // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group !30 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 23), !llvm.access.group !30 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !30 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !30 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2376,12 +2535,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2391,43 +2549,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2437,98 +2599,111 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10]], !llvm.access.group !33 +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10]], !llvm.access.group !33 // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group !36 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1), !llvm.access.group !36 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !36 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !36 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2537,12 +2712,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2552,43 +2726,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2598,77 +2776,84 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10]], !llvm.access.group !39 +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10]], !llvm.access.group !39 // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: // CHECK5-NEXT: invoke void @_ZN1SC1El(%struct.S* nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 23) @@ -2677,48 +2862,53 @@ // CHECK5-NEXT: [[CALL:%.*]] = invoke signext i8 @_ZN1ScvcEv(%struct.S* nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK5-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]], !llvm.access.group !42 // CHECK5: invoke.cont2: -// CHECK5-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]), !llvm.access.group !42 +// CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]), !llvm.access.group !42 // CHECK5-NEXT: call void @_ZN1SD1Ev(%struct.S* nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]], !llvm.access.group !42 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !42 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK5-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !42 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK5-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !42 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP13]], align 8, !llvm.access.group !42 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP14]], align 8, !llvm.access.group !42 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !42 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP19:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]], !llvm.access.group !42 +// CHECK5-NEXT: [[TMP20:%.*]] = extractvalue { i8*, i32 } [[TMP19]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP20]]) #[[ATTR10]], !llvm.access.group !42 // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2728,43 +2918,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2774,27 +2968,27 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10]], !llvm.access.group !45 +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10]], !llvm.access.group !45 // CHECK5-NEXT: unreachable // // @@ -2924,71 +3118,84 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !13 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !13 +// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group !13 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !13 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2997,12 +3204,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3012,43 +3218,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 // CHECK9-NEXT: invoke void @_Z3foov() @@ -3058,27 +3268,27 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK9-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK9-NEXT: catch i8* null -// CHECK9-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10:[0-9]+]], !llvm.access.group !17 +// CHECK9-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10:[0-9]+]], !llvm.access.group !17 // CHECK9-NEXT: unreachable // // @@ -3093,78 +3303,92 @@ // CHECK9-SAME: (i64 [[A:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i8* [[CONV]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i8* [[CONV]], i8** [[TMP0]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8* nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i8* [[A]], i8** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i8*, i8** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP0]], align 1, !llvm.access.group !22 -// CHECK9-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]), !llvm.access.group !22 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP10:%.*]] = load i8, i8* [[TMP2]], align 1, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group !22 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 // CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !22 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !22 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3173,12 +3397,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3188,43 +3411,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 // CHECK9-NEXT: invoke void @_Z3foov() @@ -3234,27 +3461,27 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK9-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK9-NEXT: catch i8* null -// CHECK9-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10]], !llvm.access.group !25 +// CHECK9-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10]], !llvm.access.group !25 // CHECK9-NEXT: unreachable // // @@ -3342,71 +3569,84 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group !28 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !28 +// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 5), !llvm.access.group !28 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !28 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !28 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3415,12 +3655,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3430,43 +3669,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 // CHECK9-NEXT: invoke void @_Z3foov() @@ -3476,98 +3719,111 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK9-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK9-NEXT: catch i8* null -// CHECK9-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10]], !llvm.access.group !31 +// CHECK9-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10]], !llvm.access.group !31 // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group !34 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !34 +// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 23), !llvm.access.group !34 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !34 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !34 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3576,12 +3832,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3591,43 +3846,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 // CHECK9-NEXT: invoke void @_Z3foov() @@ -3637,98 +3896,111 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK9-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK9-NEXT: catch i8* null -// CHECK9-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10]], !llvm.access.group !37 +// CHECK9-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10]], !llvm.access.group !37 // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group !40 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !40 +// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1), !llvm.access.group !40 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !40 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !40 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3737,12 +4009,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3752,43 +4023,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 // CHECK9-NEXT: invoke void @_Z3foov() @@ -3798,77 +4073,84 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK9-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK9-NEXT: catch i8* null -// CHECK9-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10]], !llvm.access.group !43 +// CHECK9-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10]], !llvm.access.group !43 // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: invoke void @_ZN1SC1El(%struct.S* nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 23) @@ -3877,48 +4159,53 @@ // CHECK9-NEXT: [[CALL:%.*]] = invoke i8 @_ZN1ScvcEv(%struct.S* nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]], !llvm.access.group !46 // CHECK9: invoke.cont2: -// CHECK9-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]), !llvm.access.group !46 +// CHECK9-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]), !llvm.access.group !46 // CHECK9-NEXT: call void @_ZN1SD1Ev(%struct.S* nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]], !llvm.access.group !46 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !46 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK9-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK9-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP13]], align 8, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP14]], align 8, !llvm.access.group !46 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !46 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } +// CHECK9-NEXT: [[TMP19:%.*]] = landingpad { i8*, i32 } // CHECK9-NEXT: catch i8* null -// CHECK9-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]], !llvm.access.group !46 +// CHECK9-NEXT: [[TMP20:%.*]] = extractvalue { i8*, i32 } [[TMP19]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP20]]) #[[ATTR10]], !llvm.access.group !46 // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3928,43 +4215,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 // CHECK9-NEXT: invoke void @_Z3foov() @@ -3974,27 +4265,27 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK9-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK9-NEXT: catch i8* null -// CHECK9-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10]], !llvm.access.group !49 +// CHECK9-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10]], !llvm.access.group !49 // CHECK9-NEXT: unreachable // // @@ -4454,71 +4745,84 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK13-SAME: () #[[ATTR3:[0-9]+]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !13 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !13 +// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group !13 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !13 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4527,12 +4831,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4542,43 +4845,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 // CHECK13-NEXT: invoke void @_Z3foov() @@ -4588,27 +4895,27 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK13-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK13-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK13-NEXT: catch i8* null -// CHECK13-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10:[0-9]+]], !llvm.access.group !17 +// CHECK13-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10:[0-9]+]], !llvm.access.group !17 // CHECK13-NEXT: unreachable // // @@ -4623,78 +4930,92 @@ // CHECK13-SAME: (i64 [[A:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK13-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i8* [[CONV]]) +// CHECK13-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i8* [[CONV]], i8** [[TMP0]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8* nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i8* [[A]], i8** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i8*, i8** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP0]], align 1, !llvm.access.group !22 -// CHECK13-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]), !llvm.access.group !22 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP10:%.*]] = load i8, i8* [[TMP2]], align 1, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group !22 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !22 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !22 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK13-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4703,12 +5024,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4718,43 +5038,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 // CHECK13-NEXT: invoke void @_Z3foov() @@ -4764,27 +5088,27 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK13-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK13-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK13-NEXT: catch i8* null -// CHECK13-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10]], !llvm.access.group !25 +// CHECK13-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10]], !llvm.access.group !25 // CHECK13-NEXT: unreachable // // @@ -4863,71 +5187,84 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group !28 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !28 +// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 5), !llvm.access.group !28 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !28 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4936,12 +5273,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4951,43 +5287,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 // CHECK13-NEXT: invoke void @_Z3foov() @@ -4997,98 +5337,111 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK13-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK13-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK13-NEXT: catch i8* null -// CHECK13-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10]], !llvm.access.group !31 +// CHECK13-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10]], !llvm.access.group !31 // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group !34 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !34 +// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 23), !llvm.access.group !34 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !34 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5097,12 +5450,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5112,43 +5464,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 // CHECK13-NEXT: invoke void @_Z3foov() @@ -5158,98 +5514,111 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK13-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK13-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK13-NEXT: catch i8* null -// CHECK13-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10]], !llvm.access.group !37 +// CHECK13-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10]], !llvm.access.group !37 // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*)) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group !40 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !40 +// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1), !llvm.access.group !40 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !40 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5258,12 +5627,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5273,43 +5641,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 // CHECK13-NEXT: invoke void @_Z3foov() @@ -5319,77 +5691,84 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK13-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK13-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK13-NEXT: catch i8* null -// CHECK13-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10]], !llvm.access.group !43 +// CHECK13-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10]], !llvm.access.group !43 // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: // CHECK13-NEXT: invoke void @_ZN1SC1El(%struct.S* nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 23) @@ -5398,48 +5777,53 @@ // CHECK13-NEXT: [[CALL:%.*]] = invoke i8 @_ZN1ScvcEv(%struct.S* nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK13-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]], !llvm.access.group !46 // CHECK13: invoke.cont2: -// CHECK13-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]), !llvm.access.group !46 +// CHECK13-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK13-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]), !llvm.access.group !46 // CHECK13-NEXT: call void @_ZN1SD1Ev(%struct.S* nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]], !llvm.access.group !46 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !46 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK13-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK13-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP13]], align 8, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP14]], align 8, !llvm.access.group !46 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !46 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK13-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } +// CHECK13-NEXT: [[TMP19:%.*]] = landingpad { i8*, i32 } // CHECK13-NEXT: catch i8* null -// CHECK13-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR10]], !llvm.access.group !46 +// CHECK13-NEXT: [[TMP20:%.*]] = extractvalue { i8*, i32 } [[TMP19]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP20]]) #[[ATTR10]], !llvm.access.group !46 // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5449,43 +5833,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 // CHECK13-NEXT: invoke void @_Z3foov() @@ -5495,27 +5883,27 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK13-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK13-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK13-NEXT: catch i8* null -// CHECK13-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR10]], !llvm.access.group !49 +// CHECK13-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR10]], !llvm.access.group !49 // CHECK13-NEXT: unreachable // // diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp @@ -151,15 +151,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 @@ -167,62 +169,73 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store double* undef, double** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store double* [[G1]], double** [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -231,12 +244,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 @@ -250,80 +262,84 @@ // CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store double* undef, double** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store double* [[G1]], double** [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 // CHECK1-NEXT: store double 1.000000e+00, double* [[G]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP3]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP15:%.*]] = load double*, double** [[_TMP3]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP15]], align 8, !llvm.access.group !8 // CHECK1-NEXT: store i32 3, i32* [[SVAR]], align 4, !llvm.access.group !8 // CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[G]], double** [[TMP11]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP3]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: store double* [[TMP13]], double** [[TMP12]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP14]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP15]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[G]], double** [[TMP16]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load double*, double** [[_TMP3]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store double* [[TMP18]], double** [[TMP17]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP19]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP20]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -354,15 +370,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca double*, align 4 @@ -370,60 +388,71 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store double* undef, double** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !5 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP10]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP11]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -432,12 +461,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca double*, align 4 @@ -451,78 +479,82 @@ // CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK3-NEXT: store double* undef, double** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 // CHECK3-NEXT: store double 1.000000e+00, double* [[G]], align 8, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP2]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP15:%.*]] = load double*, double** [[_TMP2]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP15]], align 4, !llvm.access.group !9 // CHECK3-NEXT: store i32 3, i32* [[SVAR]], align 4, !llvm.access.group !9 // CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G]], double** [[TMP11]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP2]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store double* [[TMP13]], double** [[TMP12]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP14]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP15]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !9 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP16]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load double*, double** [[_TMP2]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store double* [[TMP18]], double** [[TMP17]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP19]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP20]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -639,15 +671,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 8 @@ -655,6 +689,8 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -662,8 +698,11 @@ // CHECK9-NEXT: [[_TMP2:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store %struct.S* undef, %struct.S** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 @@ -681,62 +720,68 @@ // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !5 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] @@ -746,12 +791,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 8 @@ -768,15 +812,19 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store %struct.S* undef, %struct.S** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -793,74 +841,74 @@ // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 // CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK9-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* -// CHECK9-NEXT: [[TMP15:%.*]] = bitcast %struct.S* [[TMP12]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false), !llvm.access.group !9 +// CHECK9-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* +// CHECK9-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[TMP17]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 4, i1 false), !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP21]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP26]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] @@ -883,23 +931,23 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) -// CHECK9-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK9-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK9-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 8 +// CHECK9-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK9-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2) // CHECK9-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 1) // CHECK9-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 @@ -909,17 +957,17 @@ // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done2: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK9-NEXT: ret i32 [[TMP4]] // @@ -959,300 +1007,316 @@ // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: -// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !14 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !14 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done5: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: -// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK9-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* -// CHECK9-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false), !llvm.access.group !17 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP17:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 8, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: [[TMP19:%.*]] = bitcast %struct.S.1* [[ARRAYIDX6]] to i8* +// CHECK9-NEXT: [[TMP20:%.*]] = bitcast %struct.S.1* [[TMP17]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 4, i1 false), !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN8]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP21]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP26]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done9: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, i32* [[F]], align 4 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1339,15 +1403,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95 // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 4 @@ -1355,6 +1421,8 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -1362,8 +1430,11 @@ // CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store %struct.S* undef, %struct.S** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 @@ -1381,60 +1452,66 @@ // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !6 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP10]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP11]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] @@ -1444,12 +1521,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 4 @@ -1466,15 +1542,19 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK11-NEXT: store %struct.S* undef, %struct.S** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 @@ -1489,72 +1569,72 @@ // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* -// CHECK11-NEXT: [[TMP15:%.*]] = bitcast %struct.S* [[TMP12]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false), !llvm.access.group !10 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP16]] +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK11-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK11-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[TMP17]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i32 4, i1 false), !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK11-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP21]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP26]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1577,23 +1657,23 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK11-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 4 -// CHECK11-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK11-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 4 +// CHECK11-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK11-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2) // CHECK11-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 1) // CHECK11-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 @@ -1603,17 +1683,17 @@ // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done2: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK11-NEXT: ret i32 [[TMP4]] // @@ -1653,294 +1733,310 @@ // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK11-SAME: () #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: -// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !15 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP10]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP11]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN4]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done5: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: -// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* -// CHECK11-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false), !llvm.access.group !18 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP16]] +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP17:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK11-NEXT: [[TMP19:%.*]] = bitcast %struct.S.1* [[ARRAYIDX4]] to i8* +// CHECK11-NEXT: [[TMP20:%.*]] = bitcast %struct.S.1* [[TMP17]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i32 4, i1 false), !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK11-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP21]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP26]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done7: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, i32* [[F]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp @@ -83,71 +83,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 4), !llvm.access.group !6 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !6 +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 4), !llvm.access.group !6 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !6 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -156,12 +169,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -171,60 +183,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -235,71 +251,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 3), !llvm.access.group !15 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 3), !llvm.access.group !15 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -308,12 +337,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -323,60 +351,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -402,71 +434,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l29 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !21 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !21 +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group !21 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -475,12 +520,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -490,60 +534,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/distribute_private_codegen.cpp b/clang/test/OpenMP/distribute_private_codegen.cpp --- a/clang/test/OpenMP/distribute_private_codegen.cpp +++ b/clang/test/OpenMP/distribute_private_codegen.cpp @@ -130,15 +130,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 @@ -155,66 +157,68 @@ // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store double* undef, double** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store double* [[G1]], double** [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: store double 1.000000e+00, double* [[G]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load double*, double** [[_TMP2]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[_TMP2]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP9]], align 8 // CHECK1-NEXT: store i32 3, i32* [[SVAR]], align 4 // CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[G]], double** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP2]], align 8 -// CHECK1-NEXT: store double* [[TMP11]], double** [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[G]], double** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP2]], align 8 +// CHECK1-NEXT: store double* [[TMP12]], double** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP14]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -241,15 +245,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca double*, align 4 @@ -266,66 +272,68 @@ // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store double* undef, double** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK3-NEXT: store double 1.000000e+00, double* [[G]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load double*, double** [[_TMP2]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP9]], align 4 // CHECK3-NEXT: store i32 3, i32* [[SVAR]], align 4 // CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G]], double** [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP2]], align 4 -// CHECK3-NEXT: store double* [[TMP11]], double** [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: store double* [[TMP12]], double** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP14]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // @@ -422,15 +430,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 8 @@ -447,6 +457,8 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store %struct.S* undef, %struct.S** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 @@ -464,67 +476,67 @@ // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK9-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8* -// CHECK9-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[TMP10]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8* +// CHECK9-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[TMP11]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -546,15 +558,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -565,49 +579,51 @@ // CHECK9-NEXT: [[I1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // @@ -615,23 +631,23 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) -// CHECK9-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK9-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK9-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 8 +// CHECK9-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK9-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 2) // CHECK9-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0) // CHECK9-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 @@ -641,17 +657,17 @@ // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done2: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK9-NEXT: ret i32 [[TMP4]] // @@ -691,182 +707,186 @@ // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: -// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK9-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5]] to i8* -// CHECK9-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP10]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false) +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK9-NEXT: [[TMP13:%.*]] = bitcast %struct.S.1* [[ARRAYIDX5]] to i8* +// CHECK9-NEXT: [[TMP14:%.*]] = bitcast %struct.S.1* [[TMP11]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done8: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, i32* [[F]], align 4 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -963,15 +983,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93 // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 4 @@ -988,6 +1010,8 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store %struct.S* undef, %struct.S** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 @@ -1005,65 +1029,65 @@ // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP9]] -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* -// CHECK11-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[TMP10]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP10]] +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK11-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[TMP11]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1085,15 +1109,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102 // CHECK11-SAME: () #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1104,49 +1130,51 @@ // CHECK11-NEXT: [[I1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK11-NEXT: ret void // // @@ -1154,23 +1182,23 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK11-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 4 -// CHECK11-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK11-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 4 +// CHECK11-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK11-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 2) // CHECK11-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0) // CHECK11-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 @@ -1180,17 +1208,17 @@ // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done2: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK11-NEXT: ret i32 [[TMP4]] // @@ -1230,180 +1258,184 @@ // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK11-SAME: () #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: -// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP9]] -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* -// CHECK11-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP10]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP10]] +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: [[TMP13:%.*]] = bitcast %struct.S.1* [[ARRAYIDX4]] to i8* +// CHECK11-NEXT: [[TMP14:%.*]] = bitcast %struct.S.1* [[TMP11]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done7: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, i32* [[F]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_simd_codegen.cpp b/clang/test/OpenMP/distribute_simd_codegen.cpp --- a/clang/test/OpenMP/distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_codegen.cpp @@ -215,23 +215,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK1-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined. to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -241,84 +247,86 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP4]], i64 16) ] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP9]], i64 16) ] // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP21]] -// CHECK1-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM8]] +// CHECK1-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM2]] +// CHECK1-NEXT: [[TMP23:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK1-NEXT: [[TMP24:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP24]], i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP26:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP26]] +// CHECK1-NEXT: [[TMP27:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP27]], i64 [[IDXPROM8]] // CHECK1-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 32000001, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -397,23 +405,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK1-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -423,82 +437,84 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK1-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX3]], align 4 -// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK1-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4 -// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK1-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]] +// CHECK1-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM2]] +// CHECK1-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK1-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM8]] // CHECK1-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 32, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -577,23 +593,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK1-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..4 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -603,99 +625,101 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK1-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK1-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] +// CHECK1-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM3]] +// CHECK1-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP25]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP27:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK1-NEXT: [[TMP28:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP28]], i64 [[IDXPROM9]] // CHECK1-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK1-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 -2147483522, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -772,121 +796,127 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i64 [[I]], i64* [[I_ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i8* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8*, i8*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i8* [[CONV]], i8* [[CONV1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i8* [[CONV]], i8** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i8* [[CONV1]], i8** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[I:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i8* [[I]], i8** [[I_ADDR]], align 8 -// CHECK1-NEXT: store i8* [[A]], i8** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8*, i8** [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8*, i8** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 -// CHECK1-NEXT: store i8 [[TMP2]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1 +// CHECK1-NEXT: store i8 [[TMP5]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK1-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK1-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: store i8 [[TMP4]], i8* [[I4]], align 1 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP7]], i8* [[I]], align 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !20 -// CHECK1-NEXT: [[CONV9:%.*]] = sext i8 [[TMP16]] to i32 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK1-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK1-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !20 +// CHECK1-NEXT: [[CONV8:%.*]] = sext i8 [[TMP19]] to i32 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK1-NEXT: store i8 [[CONV10]], i8* [[I5]], align 1, !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK1-NEXT: [[TMP24:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV14:%.*]] = sext i8 [[TMP24]] to i32 -// CHECK1-NEXT: [[SUB15:%.*]] = sub i32 10, [[CONV14]] -// CHECK1-NEXT: [[SUB16:%.*]] = sub i32 [[SUB15]], 1 -// CHECK1-NEXT: [[ADD17:%.*]] = add i32 [[SUB16]], 1 -// CHECK1-NEXT: [[DIV18:%.*]] = udiv i32 [[ADD17]], 1 -// CHECK1-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[CONV13]], [[MUL19]] -// CHECK1-NEXT: [[CONV21:%.*]] = trunc i32 [[ADD20]] to i8 -// CHECK1-NEXT: store i8 [[CONV21]], i8* [[TMP0]], align 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV12:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK1-NEXT: [[TMP27:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP27]] to i32 +// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 10, [[CONV13]] +// CHECK1-NEXT: [[SUB15:%.*]] = sub i32 [[SUB14]], 1 +// CHECK1-NEXT: [[ADD16:%.*]] = add i32 [[SUB15]], 1 +// CHECK1-NEXT: [[DIV17:%.*]] = udiv i32 [[ADD16]], 1 +// CHECK1-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV12]], [[MUL18]] +// CHECK1-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 +// CHECK1-NEXT: store i8 [[CONV20]], i8* [[TMP2]], align 1 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -940,18 +970,21 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i16* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i16* [[CONV]], i16** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -961,73 +994,75 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1113,23 +1148,29 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca float*, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca float*, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 4 // CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 4 // CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 4 // CHECK3-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined. to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca float**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca float**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca float**, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca float**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1139,80 +1180,82 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store float** [[A]], float*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store float** [[B]], float*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store float** [[C]], float*** [[C_ADDR]], align 4 -// CHECK3-NEXT: store float** [[D]], float*** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load float*, float** [[TMP0]], align 4 -// CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP4]], i32 16) ] +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load float*, float** [[TMP2]], align 4 +// CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP9]], i32 16) ] // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i32 [[TMP14]] -// CHECK3-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP16]], i32 [[TMP17]] -// CHECK3-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK3-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP19]], i32 [[TMP20]] -// CHECK3-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 [[TMP23]] +// CHECK3-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP4]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP18]], i32 [[TMP19]] +// CHECK3-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP6]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP21]], i32 [[TMP22]] +// CHECK3-NEXT: [[TMP23:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK3-NEXT: [[TMP24:%.*]] = load float*, float** [[TMP8]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP24]], i32 [[TMP25]] +// CHECK3-NEXT: [[TMP26:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP26]] +// CHECK3-NEXT: [[TMP27:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP27]], i32 [[TMP28]] // CHECK3-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 32000001, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1291,23 +1334,29 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca float*, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca float*, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 4 // CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 4 // CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 4 // CHECK3-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca float**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca float**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca float**, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca float**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1317,78 +1366,80 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store float** [[A]], float*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store float** [[B]], float*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store float** [[C]], float*** [[C_ADDR]], align 4 -// CHECK3-NEXT: store float** [[D]], float*** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK3-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP15]], i32 [[TMP16]] -// CHECK3-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX2]], align 4 -// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK3-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP18]], i32 [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX4]], align 4 -// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK3-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP21]], i32 [[TMP22]] +// CHECK3-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] +// CHECK3-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] +// CHECK3-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK3-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] +// CHECK3-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK3-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP26]], i32 [[TMP27]] // CHECK3-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK3-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 32, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1467,23 +1518,29 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca float*, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca float*, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 4 // CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 4 // CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 4 // CHECK3-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..4 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca float**, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca float**, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca float**, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca float**, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1493,95 +1550,97 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store float** [[A]], float*** [[A_ADDR]], align 4 -// CHECK3-NEXT: store float** [[B]], float*** [[B_ADDR]], align 4 -// CHECK3-NEXT: store float** [[C]], float*** [[C_ADDR]], align 4 -// CHECK3-NEXT: store float** [[D]], float*** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK3-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 [[TMP15]] -// CHECK3-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] -// CHECK3-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK3-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] +// CHECK3-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP4]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP19]], i32 [[TMP20]] +// CHECK3-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP6]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 [[TMP23]] +// CHECK3-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[MUL4:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK3-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP8]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP25]], i32 [[TMP26]] +// CHECK3-NEXT: [[TMP27:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP27]] +// CHECK3-NEXT: [[TMP28:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP28]], i32 [[TMP29]] // CHECK3-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP30]], 1 // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 -2147483522, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1658,121 +1717,127 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32 [[I]], i32* [[I_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[I_ADDR]] to i8* // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[A_ADDR]] to i8* -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8*, i8*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i8* [[CONV]], i8* [[CONV1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i8* [[CONV]], i8** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i8* [[CONV1]], i8** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[I:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i8* [[I]], i8** [[I_ADDR]], align 4 -// CHECK3-NEXT: store i8* [[A]], i8** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i8*, i8** [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i8*, i8** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 -// CHECK3-NEXT: store i8 [[TMP2]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1 +// CHECK3-NEXT: store i8 [[TMP5]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK3-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK3-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK3-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: store i8 [[TMP4]], i8* [[I4]], align 1 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK3-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: store i8 [[TMP7]], i8* [[I]], align 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !21 -// CHECK3-NEXT: [[CONV9:%.*]] = sext i8 [[TMP16]] to i32 // CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK3-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK3-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !llvm.access.group !21 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !21 +// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP19]] to i32 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK3-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK3-NEXT: store i8 [[CONV10]], i8* [[I5]], align 1, !llvm.access.group !21 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP23:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV13:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK3-NEXT: [[TMP24:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV14:%.*]] = sext i8 [[TMP24]] to i32 -// CHECK3-NEXT: [[SUB15:%.*]] = sub i32 10, [[CONV14]] -// CHECK3-NEXT: [[SUB16:%.*]] = sub i32 [[SUB15]], 1 -// CHECK3-NEXT: [[ADD17:%.*]] = add i32 [[SUB16]], 1 -// CHECK3-NEXT: [[DIV18:%.*]] = udiv i32 [[ADD17]], 1 -// CHECK3-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 -// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[CONV13]], [[MUL19]] -// CHECK3-NEXT: [[CONV21:%.*]] = trunc i32 [[ADD20]] to i8 -// CHECK3-NEXT: store i8 [[CONV21]], i8* [[TMP0]], align 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV12:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK3-NEXT: [[TMP27:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV13:%.*]] = sext i8 [[TMP27]] to i32 +// CHECK3-NEXT: [[SUB14:%.*]] = sub i32 10, [[CONV13]] +// CHECK3-NEXT: [[SUB15:%.*]] = sub i32 [[SUB14]], 1 +// CHECK3-NEXT: [[ADD16:%.*]] = add i32 [[SUB15]], 1 +// CHECK3-NEXT: [[DIV17:%.*]] = udiv i32 [[ADD16]], 1 +// CHECK3-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 +// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV12]], [[MUL18]] +// CHECK3-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 +// CHECK3-NEXT: store i8 [[CONV20]], i8* [[TMP2]], align 1 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] @@ -1826,18 +1891,21 @@ // CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i16* [[CONV]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i16* [[CONV]], i16** [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1847,73 +1915,75 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1999,23 +2069,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK5-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK5-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK5-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined. to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2025,84 +2101,86 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK5-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK5-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK5-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK5-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP4]], i64 16) ] +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK5-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP9]], i64 16) ] // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]] -// CHECK5-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM2]] -// CHECK5-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK5-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM5]] -// CHECK5-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP21]] -// CHECK5-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM8]] +// CHECK5-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM]] +// CHECK5-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM2]] +// CHECK5-NEXT: [[TMP23:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK5-NEXT: [[TMP24:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP24]], i64 [[IDXPROM5]] +// CHECK5-NEXT: [[TMP26:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP26]] +// CHECK5-NEXT: [[TMP27:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP27]], i64 [[IDXPROM8]] // CHECK5-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !8 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK5-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK5-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK5-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 32000001, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2181,23 +2259,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK5-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK5-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK5-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2207,82 +2291,84 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK5-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK5-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK5-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK5-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8, !nontemporal !15 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]] -// CHECK5-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM2]] -// CHECK5-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX3]], align 4 -// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK5-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]] -// CHECK5-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4 -// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK5-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8, !nontemporal !15 -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]] +// CHECK5-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 8, !nontemporal !15 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM]] +// CHECK5-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM2]] +// CHECK5-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK5-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM5]] +// CHECK5-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK5-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 8, !nontemporal !15 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM8]] // CHECK5-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK5-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 32, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2361,23 +2447,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK5-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK5-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK5-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..4 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2387,99 +2479,101 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK5-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK5-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK5-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK5-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK5-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK5-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK5-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK5-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK5-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] +// CHECK5-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM]] +// CHECK5-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM3]] +// CHECK5-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK5-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP25]], i64 [[IDXPROM6]] +// CHECK5-NEXT: [[TMP27:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK5-NEXT: [[TMP28:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP28]], i64 [[IDXPROM9]] // CHECK5-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK5-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK5-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK5-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK5-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK5-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK5-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 -2147483522, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2556,152 +2650,158 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store i64 [[I]], i64* [[I_ADDR]], align 8 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i8* // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8*, i8*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i8* [[CONV]], i8* [[CONV1]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i8* [[CONV]], i8** [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i8* [[CONV1]], i8** [[TMP1]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[I:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[I_ADDR:%.*]] = alloca i8*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK5-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK5-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i8* [[I]], i8** [[I_ADDR]], align 8 -// CHECK5-NEXT: store i8* [[A]], i8** [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i8*, i8** [[I_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i8*, i8** [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 -// CHECK5-NEXT: store i8 [[TMP2]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[TMP3:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK5-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i8*, i8** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1 +// CHECK5-NEXT: store i8 [[TMP5]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[TMP6:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK5-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK5-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK5-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK5-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK5-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: store i8 [[TMP4]], i8* [[I4]], align 1 -// CHECK5-NEXT: [[TMP5:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: store i8 [[TMP7]], i8* [[I]], align 1 +// CHECK5-NEXT: [[TMP8:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK5: omp.precond.then: // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK5-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP1]], align 1 -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP14]], 0 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i8, i8* [[TMP4]], align 1 +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP17]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !21 -// CHECK5-NEXT: [[CONV9:%.*]] = sext i8 [[TMP17]] to i32 // CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK5-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK5-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !nontemporal !15, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5: omp.inner.for.body: +// CHECK5-NEXT: [[TMP20:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !21 +// CHECK5-NEXT: [[CONV8:%.*]] = sext i8 [[TMP20]] to i32 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK5-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK5-NEXT: store i8 [[CONV10]], i8* [[I5]], align 1, !nontemporal !15, !llvm.access.group !21 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK5-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_IF_END:%.*]] // CHECK5: omp_if.else: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] -// CHECK5: omp.inner.for.cond13: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK5-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END23:%.*]] -// CHECK5: omp.inner.for.body15: -// CHECK5-NEXT: [[TMP22:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[CONV16:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK5: omp.inner.for.cond12: // CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP23]], 1 -// CHECK5-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV16]], [[MUL17]] -// CHECK5-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK5-NEXT: store i8 [[CONV19]], i8* [[I6]], align 1 -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE20:%.*]] -// CHECK5: omp.body.continue20: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC21:%.*]] -// CHECK5: omp.inner.for.inc21: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK5-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP24:![0-9]+]] -// CHECK5: omp.inner.for.end23: +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP13:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK5-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END22:%.*]] +// CHECK5: omp.inner.for.body14: +// CHECK5-NEXT: [[TMP25:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[CONV15:%.*]] = sext i8 [[TMP25]] to i32 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL16:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK5-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV15]], [[MUL16]] +// CHECK5-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK5-NEXT: store i8 [[CONV18]], i8* [[I5]], align 1 +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE19:%.*]] +// CHECK5: omp.body.continue19: +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC20:%.*]] +// CHECK5: omp.inner.for.inc20: +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK5-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK5: omp.inner.for.end22: // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK5-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: -// CHECK5-NEXT: [[TMP29:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[CONV24:%.*]] = sext i8 [[TMP29]] to i32 -// CHECK5-NEXT: [[TMP30:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[CONV25:%.*]] = sext i8 [[TMP30]] to i32 -// CHECK5-NEXT: [[SUB26:%.*]] = sub i32 10, [[CONV25]] -// CHECK5-NEXT: [[SUB27:%.*]] = sub i32 [[SUB26]], 1 -// CHECK5-NEXT: [[ADD28:%.*]] = add i32 [[SUB27]], 1 -// CHECK5-NEXT: [[DIV29:%.*]] = udiv i32 [[ADD28]], 1 -// CHECK5-NEXT: [[MUL30:%.*]] = mul nsw i32 [[DIV29]], 1 -// CHECK5-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV24]], [[MUL30]] -// CHECK5-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK5-NEXT: store i8 [[CONV32]], i8* [[TMP0]], align 1 +// CHECK5-NEXT: [[TMP32:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[CONV23:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK5-NEXT: [[TMP33:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[CONV24:%.*]] = sext i8 [[TMP33]] to i32 +// CHECK5-NEXT: [[SUB25:%.*]] = sub i32 10, [[CONV24]] +// CHECK5-NEXT: [[SUB26:%.*]] = sub i32 [[SUB25]], 1 +// CHECK5-NEXT: [[ADD27:%.*]] = add i32 [[SUB26]], 1 +// CHECK5-NEXT: [[DIV28:%.*]] = udiv i32 [[ADD27]], 1 +// CHECK5-NEXT: [[MUL29:%.*]] = mul nsw i32 [[DIV28]], 1 +// CHECK5-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV23]], [[MUL29]] +// CHECK5-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK5-NEXT: store i8 [[CONV31]], i8* [[TMP2]], align 1 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: br label [[OMP_PRECOND_END]] @@ -2755,18 +2855,21 @@ // CHECK5-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i16* [[CONV]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i16* [[CONV]], i16** [[TMP0]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2776,73 +2879,75 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK5-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK5-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2928,23 +3033,29 @@ // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca float*, align 4 // CHECK7-NEXT: [[C_ADDR:%.*]] = alloca float*, align 4 // CHECK7-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK7-NEXT: store float* [[A]], float** [[A_ADDR]], align 4 // CHECK7-NEXT: store float* [[B]], float** [[B_ADDR]], align 4 // CHECK7-NEXT: store float* [[C]], float** [[C_ADDR]], align 4 // CHECK7-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined. to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca float**, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca float**, align 4 -// CHECK7-NEXT: [[C_ADDR:%.*]] = alloca float**, align 4 -// CHECK7-NEXT: [[D_ADDR:%.*]] = alloca float**, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2954,80 +3065,82 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store float** [[A]], float*** [[A_ADDR]], align 4 -// CHECK7-NEXT: store float** [[B]], float*** [[B_ADDR]], align 4 -// CHECK7-NEXT: store float** [[C]], float*** [[C_ADDR]], align 4 -// CHECK7-NEXT: store float** [[D]], float*** [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load float*, float** [[TMP0]], align 4 -// CHECK7-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP4]], i32 16) ] +// CHECK7-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load float*, float** [[TMP2]], align 4 +// CHECK7-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP9]], i32 16) ] // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i32 [[TMP14]] -// CHECK7-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP16]], i32 [[TMP17]] -// CHECK7-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[MUL3:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK7-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP19]], i32 [[TMP20]] -// CHECK7-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP21]] -// CHECK7-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 [[TMP23]] +// CHECK7-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP4]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP18]], i32 [[TMP19]] +// CHECK7-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP6]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP21]], i32 [[TMP22]] +// CHECK7-NEXT: [[TMP23:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[MUL3:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK7-NEXT: [[TMP24:%.*]] = load float*, float** [[TMP8]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP24]], i32 [[TMP25]] +// CHECK7-NEXT: [[TMP26:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP26]] +// CHECK7-NEXT: [[TMP27:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP27]], i32 [[TMP28]] // CHECK7-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !9 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK7-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK7-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK7-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 32000001, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3106,23 +3219,29 @@ // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca float*, align 4 // CHECK7-NEXT: [[C_ADDR:%.*]] = alloca float*, align 4 // CHECK7-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: store float* [[A]], float** [[A_ADDR]], align 4 // CHECK7-NEXT: store float* [[B]], float** [[B_ADDR]], align 4 // CHECK7-NEXT: store float* [[C]], float** [[C_ADDR]], align 4 // CHECK7-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca float**, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca float**, align 4 -// CHECK7-NEXT: [[C_ADDR:%.*]] = alloca float**, align 4 -// CHECK7-NEXT: [[D_ADDR:%.*]] = alloca float**, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3132,78 +3251,80 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store float** [[A]], float*** [[A_ADDR]], align 4 -// CHECK7-NEXT: store float** [[B]], float*** [[B_ADDR]], align 4 -// CHECK7-NEXT: store float** [[C]], float*** [[C_ADDR]], align 4 -// CHECK7-NEXT: store float** [[D]], float*** [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK7-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 4, !nontemporal !16 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 [[TMP13]] -// CHECK7-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP15]], i32 [[TMP16]] -// CHECK7-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX2]], align 4 -// CHECK7-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK7-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP18]], i32 [[TMP19]] -// CHECK7-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX4]], align 4 -// CHECK7-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK7-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 4, !nontemporal !16 -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP21]], i32 [[TMP22]] +// CHECK7-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 4, !nontemporal !16 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] +// CHECK7-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] +// CHECK7-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +// CHECK7-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK7-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] +// CHECK7-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK7-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK7-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 4, !nontemporal !16 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP26]], i32 [[TMP27]] // CHECK7-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK7-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK7-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK7-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 32, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3282,23 +3403,29 @@ // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca float*, align 4 // CHECK7-NEXT: [[C_ADDR:%.*]] = alloca float*, align 4 // CHECK7-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK7-NEXT: store float* [[A]], float** [[A_ADDR]], align 4 // CHECK7-NEXT: store float* [[B]], float** [[B_ADDR]], align 4 // CHECK7-NEXT: store float* [[C]], float** [[C_ADDR]], align 4 // CHECK7-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..4 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca float**, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca float**, align 4 -// CHECK7-NEXT: [[C_ADDR:%.*]] = alloca float**, align 4 -// CHECK7-NEXT: [[D_ADDR:%.*]] = alloca float**, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3308,95 +3435,97 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store float** [[A]], float*** [[A_ADDR]], align 4 -// CHECK7-NEXT: store float** [[B]], float*** [[B_ADDR]], align 4 -// CHECK7-NEXT: store float** [[C]], float*** [[C_ADDR]], align 4 -// CHECK7-NEXT: store float** [[D]], float*** [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK7-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 [[TMP15]] -// CHECK7-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] -// CHECK7-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK7-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] -// CHECK7-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK7-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] +// CHECK7-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP4]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP19]], i32 [[TMP20]] +// CHECK7-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP6]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 [[TMP23]] +// CHECK7-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[MUL4:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK7-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP8]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP25]], i32 [[TMP26]] +// CHECK7-NEXT: [[TMP27:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP27]] +// CHECK7-NEXT: [[TMP28:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP28]], i32 [[TMP29]] // CHECK7-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !19 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[ADD8:%.*]] = add i32 [[TMP30]], 1 // CHECK7-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: -// CHECK7-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD9:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK7-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD9:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK7-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD10:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD10:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK7-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK7-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK7-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK7-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 -2147483522, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3473,152 +3602,158 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store i32 [[I]], i32* [[I_ADDR]], align 4 // CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[I_ADDR]] to i8* // CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[A_ADDR]] to i8* -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8*, i8*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i8* [[CONV]], i8* [[CONV1]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store i8* [[CONV]], i8** [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store i8* [[CONV1]], i8** [[TMP1]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[I:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[I_ADDR:%.*]] = alloca i8*, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK7-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK7-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i8* [[I]], i8** [[I_ADDR]], align 4 -// CHECK7-NEXT: store i8* [[A]], i8** [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i8*, i8** [[I_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i8*, i8** [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 -// CHECK7-NEXT: store i8 [[TMP2]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: [[TMP3:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK7-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i8*, i8** [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1 +// CHECK7-NEXT: store i8 [[TMP5]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[TMP6:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK7-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK7-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK7-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK7-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK7-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: store i8 [[TMP4]], i8* [[I4]], align 1 -// CHECK7-NEXT: [[TMP5:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK7-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK7-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: store i8 [[TMP7]], i8* [[I]], align 1 +// CHECK7-NEXT: [[TMP8:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK7-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK7: omp.precond.then: // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK7-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK7-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK7-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK7-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP1]], align 1 -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP14]], 0 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i8, i8* [[TMP4]], align 1 +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP17]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK7: omp_if.then: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 -// CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !22 -// CHECK7-NEXT: [[CONV9:%.*]] = sext i8 [[TMP17]] to i32 // CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK7-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK7-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK7-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !nontemporal !16, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK7-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK7: omp.inner.for.body: +// CHECK7-NEXT: [[TMP20:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !22 +// CHECK7-NEXT: [[CONV8:%.*]] = sext i8 [[TMP20]] to i32 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK7-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK7-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK7-NEXT: store i8 [[CONV10]], i8* [[I5]], align 1, !nontemporal !16, !llvm.access.group !22 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK7-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK7-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK7-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] // CHECK7: omp_if.else: -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] -// CHECK7: omp.inner.for.cond13: -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK7-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END23:%.*]] -// CHECK7: omp.inner.for.body15: -// CHECK7-NEXT: [[TMP22:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: [[CONV16:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK7: omp.inner.for.cond12: // CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP23]], 1 -// CHECK7-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV16]], [[MUL17]] -// CHECK7-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK7-NEXT: store i8 [[CONV19]], i8* [[I6]], align 1 -// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE20:%.*]] -// CHECK7: omp.body.continue20: -// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC21:%.*]] -// CHECK7: omp.inner.for.inc21: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK7-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP25:![0-9]+]] -// CHECK7: omp.inner.for.end23: +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP13:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK7-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END22:%.*]] +// CHECK7: omp.inner.for.body14: +// CHECK7-NEXT: [[TMP25:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[CONV15:%.*]] = sext i8 [[TMP25]] to i32 +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL16:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK7-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV15]], [[MUL16]] +// CHECK7-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK7-NEXT: store i8 [[CONV18]], i8* [[I5]], align 1 +// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE19:%.*]] +// CHECK7: omp.body.continue19: +// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC20:%.*]] +// CHECK7: omp.inner.for.inc20: +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK7-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK7: omp.inner.for.end22: // CHECK7-NEXT: br label [[OMP_IF_END]] // CHECK7: omp_if.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK7-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK7-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: -// CHECK7-NEXT: [[TMP29:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: [[CONV24:%.*]] = sext i8 [[TMP29]] to i32 -// CHECK7-NEXT: [[TMP30:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: [[CONV25:%.*]] = sext i8 [[TMP30]] to i32 -// CHECK7-NEXT: [[SUB26:%.*]] = sub i32 10, [[CONV25]] -// CHECK7-NEXT: [[SUB27:%.*]] = sub i32 [[SUB26]], 1 -// CHECK7-NEXT: [[ADD28:%.*]] = add i32 [[SUB27]], 1 -// CHECK7-NEXT: [[DIV29:%.*]] = udiv i32 [[ADD28]], 1 -// CHECK7-NEXT: [[MUL30:%.*]] = mul nsw i32 [[DIV29]], 1 -// CHECK7-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV24]], [[MUL30]] -// CHECK7-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK7-NEXT: store i8 [[CONV32]], i8* [[TMP0]], align 1 +// CHECK7-NEXT: [[TMP32:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[CONV23:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK7-NEXT: [[TMP33:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[CONV24:%.*]] = sext i8 [[TMP33]] to i32 +// CHECK7-NEXT: [[SUB25:%.*]] = sub i32 10, [[CONV24]] +// CHECK7-NEXT: [[SUB26:%.*]] = sub i32 [[SUB25]], 1 +// CHECK7-NEXT: [[ADD27:%.*]] = add i32 [[SUB26]], 1 +// CHECK7-NEXT: [[DIV28:%.*]] = udiv i32 [[ADD27]], 1 +// CHECK7-NEXT: [[MUL29:%.*]] = mul nsw i32 [[DIV28]], 1 +// CHECK7-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV23]], [[MUL29]] +// CHECK7-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK7-NEXT: store i8 [[CONV31]], i8* [[TMP2]], align 1 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: // CHECK7-NEXT: br label [[OMP_PRECOND_END]] @@ -3672,18 +3807,21 @@ // CHECK7-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i16* [[CONV]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store i16* [[CONV]], i16** [[TMP0]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3693,73 +3831,75 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK7-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK7-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK7-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK7-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5119,23 +5259,29 @@ // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK17-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK17-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK17-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined. to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5145,84 +5291,86 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK17-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK17-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK17-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK17-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP4]], i64 16) ] +// CHECK17-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK17-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP9]], i64 16) ] // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]] -// CHECK17-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM2]] -// CHECK17-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK17-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM5]] -// CHECK17-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP21]] -// CHECK17-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM8]] +// CHECK17-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM2]] +// CHECK17-NEXT: [[TMP23:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK17-NEXT: [[TMP24:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP24]], i64 [[IDXPROM5]] +// CHECK17-NEXT: [[TMP26:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP26]] +// CHECK17-NEXT: [[TMP27:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP27]], i64 [[IDXPROM8]] // CHECK17-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !9 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK17-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 32000001, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5237,23 +5385,29 @@ // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK17-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK17-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK17-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5263,82 +5417,84 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK17-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK17-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK17-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK17-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]] -// CHECK17-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM2]] -// CHECK17-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX3]], align 4 -// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK17-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]] -// CHECK17-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4 -// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK17-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]] +// CHECK17-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM2]] +// CHECK17-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK17-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM5]] +// CHECK17-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK17-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM8]] // CHECK17-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK17-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK17-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 32, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5353,23 +5509,29 @@ // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK17-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK17-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK17-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK17-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..2 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5379,99 +5541,101 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK17-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK17-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK17-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK17-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK17-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK17-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK17-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK17-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK17-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] +// CHECK17-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM3]] +// CHECK17-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK17-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP25]], i64 [[IDXPROM6]] +// CHECK17-NEXT: [[TMP27:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK17-NEXT: [[TMP28:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP28]], i64 [[IDXPROM9]] // CHECK17-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !18 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK17-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK17-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK17-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK17-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK17-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 -2147483522, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5484,121 +5648,127 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK17-NEXT: store i64 [[I]], i64* [[I_ADDR]], align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i8* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8*, i8*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i8* [[CONV]], i8* [[CONV1]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i8* [[CONV]], i8** [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i8* [[CONV1]], i8** [[TMP1]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[I:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[I_ADDR:%.*]] = alloca i8*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK17-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK17-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i8* [[I]], i8** [[I_ADDR]], align 8 -// CHECK17-NEXT: store i8* [[A]], i8** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i8*, i8** [[I_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i8*, i8** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 -// CHECK17-NEXT: store i8 [[TMP2]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[TMP3:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK17-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i8*, i8** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1 +// CHECK17-NEXT: store i8 [[TMP5]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[TMP6:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK17-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK17-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK17-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: store i8 [[TMP4]], i8* [[I4]], align 1 -// CHECK17-NEXT: [[TMP5:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK17-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: store i8 [[TMP7]], i8* [[I]], align 1 +// CHECK17-NEXT: [[TMP8:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK17-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK17-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !21 -// CHECK17-NEXT: [[CONV9:%.*]] = sext i8 [[TMP16]] to i32 // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK17-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK17-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17: omp.inner.for.body: +// CHECK17-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !21 +// CHECK17-NEXT: [[CONV8:%.*]] = sext i8 [[TMP19]] to i32 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK17-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK17-NEXT: store i8 [[CONV10]], i8* [[I5]], align 1, !llvm.access.group !21 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK17-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK17-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK17-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP23:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[CONV13:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK17-NEXT: [[TMP24:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[CONV14:%.*]] = sext i8 [[TMP24]] to i32 -// CHECK17-NEXT: [[SUB15:%.*]] = sub i32 10, [[CONV14]] -// CHECK17-NEXT: [[SUB16:%.*]] = sub i32 [[SUB15]], 1 -// CHECK17-NEXT: [[ADD17:%.*]] = add i32 [[SUB16]], 1 -// CHECK17-NEXT: [[DIV18:%.*]] = udiv i32 [[ADD17]], 1 -// CHECK17-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 -// CHECK17-NEXT: [[ADD20:%.*]] = add nsw i32 [[CONV13]], [[MUL19]] -// CHECK17-NEXT: [[CONV21:%.*]] = trunc i32 [[ADD20]] to i8 -// CHECK17-NEXT: store i8 [[CONV21]], i8* [[TMP0]], align 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[CONV12:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK17-NEXT: [[TMP27:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[CONV13:%.*]] = sext i8 [[TMP27]] to i32 +// CHECK17-NEXT: [[SUB14:%.*]] = sub i32 10, [[CONV13]] +// CHECK17-NEXT: [[SUB15:%.*]] = sub i32 [[SUB14]], 1 +// CHECK17-NEXT: [[ADD16:%.*]] = add i32 [[SUB15]], 1 +// CHECK17-NEXT: [[DIV17:%.*]] = udiv i32 [[ADD16]], 1 +// CHECK17-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 +// CHECK17-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV12]], [[MUL18]] +// CHECK17-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 +// CHECK17-NEXT: store i8 [[CONV20]], i8* [[TMP2]], align 1 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: // CHECK17-NEXT: br label [[OMP_PRECOND_END]] @@ -5610,18 +5780,21 @@ // CHECK17-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i16* [[CONV]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i16* [[CONV]], i16** [[TMP0]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5631,73 +5804,75 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK17-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK17-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 100, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5712,23 +5887,29 @@ // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca float*, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca float*, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store float* [[A]], float** [[A_ADDR]], align 4 // CHECK19-NEXT: store float* [[B]], float** [[B_ADDR]], align 4 // CHECK19-NEXT: store float* [[C]], float** [[C_ADDR]], align 4 // CHECK19-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined. to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca float**, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca float**, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca float**, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca float**, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5738,80 +5919,82 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store float** [[A]], float*** [[A_ADDR]], align 4 -// CHECK19-NEXT: store float** [[B]], float*** [[B_ADDR]], align 4 -// CHECK19-NEXT: store float** [[C]], float*** [[C_ADDR]], align 4 -// CHECK19-NEXT: store float** [[D]], float*** [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load float*, float** [[TMP0]], align 4 -// CHECK19-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP4]], i32 16) ] +// CHECK19-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load float*, float** [[TMP2]], align 4 +// CHECK19-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP9]], i32 16) ] // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i32 [[TMP14]] -// CHECK19-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP16]], i32 [[TMP17]] -// CHECK19-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK19-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP19]], i32 [[TMP20]] -// CHECK19-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP21]] -// CHECK19-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 [[TMP23]] +// CHECK19-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP4]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP18]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP6]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP21]], i32 [[TMP22]] +// CHECK19-NEXT: [[TMP23:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK19-NEXT: [[TMP24:%.*]] = load float*, float** [[TMP8]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP24]], i32 [[TMP25]] +// CHECK19-NEXT: [[TMP26:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP26]] +// CHECK19-NEXT: [[TMP27:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP27]], i32 [[TMP28]] // CHECK19-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !10 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK19-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK19-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 32000001, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5826,23 +6009,29 @@ // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca float*, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca float*, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store float* [[A]], float** [[A_ADDR]], align 4 // CHECK19-NEXT: store float* [[B]], float** [[B_ADDR]], align 4 // CHECK19-NEXT: store float* [[C]], float** [[C_ADDR]], align 4 // CHECK19-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca float**, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca float**, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca float**, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca float**, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5852,78 +6041,80 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store float** [[A]], float*** [[A_ADDR]], align 4 -// CHECK19-NEXT: store float** [[B]], float*** [[B_ADDR]], align 4 -// CHECK19-NEXT: store float** [[C]], float*** [[C_ADDR]], align 4 -// CHECK19-NEXT: store float** [[D]], float*** [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK19-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 [[TMP13]] -// CHECK19-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP15]], i32 [[TMP16]] -// CHECK19-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX2]], align 4 -// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK19-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP18]], i32 [[TMP19]] -// CHECK19-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX4]], align 4 -// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK19-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP21]], i32 [[TMP22]] +// CHECK19-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] +// CHECK19-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK19-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] +// CHECK19-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK19-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP26]], i32 [[TMP27]] // CHECK19-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK19-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK19-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 32, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5938,23 +6129,29 @@ // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca float*, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca float*, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store float* [[A]], float** [[A_ADDR]], align 4 // CHECK19-NEXT: store float* [[B]], float** [[B_ADDR]], align 4 // CHECK19-NEXT: store float* [[C]], float** [[C_ADDR]], align 4 // CHECK19-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..2 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca float**, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca float**, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca float**, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca float**, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5964,95 +6161,97 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store float** [[A]], float*** [[A_ADDR]], align 4 -// CHECK19-NEXT: store float** [[B]], float*** [[B_ADDR]], align 4 -// CHECK19-NEXT: store float** [[C]], float*** [[C_ADDR]], align 4 -// CHECK19-NEXT: store float** [[D]], float*** [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK19-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 [[TMP15]] -// CHECK19-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] -// CHECK19-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK19-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] -// CHECK19-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK19-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] +// CHECK19-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP4]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP19]], i32 [[TMP20]] +// CHECK19-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP6]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 [[TMP23]] +// CHECK19-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[MUL4:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK19-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP8]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP25]], i32 [[TMP26]] +// CHECK19-NEXT: [[TMP27:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP27]] +// CHECK19-NEXT: [[TMP28:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP28]], i32 [[TMP29]] // CHECK19-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !19 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[ADD8:%.*]] = add i32 [[TMP30]], 1 // CHECK19-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD9:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD9:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK19-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD10:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD10:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK19-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK19-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK19-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 -2147483522, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6065,121 +6264,127 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store i32 [[I]], i32* [[I_ADDR]], align 4 // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[I_ADDR]] to i8* // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[A_ADDR]] to i8* -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8*, i8*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i8* [[CONV]], i8* [[CONV1]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i8* [[CONV]], i8** [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i8* [[CONV1]], i8** [[TMP1]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[I:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[I_ADDR:%.*]] = alloca i8*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK19-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK19-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i8* [[I]], i8** [[I_ADDR]], align 4 -// CHECK19-NEXT: store i8* [[A]], i8** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i8*, i8** [[I_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i8*, i8** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 -// CHECK19-NEXT: store i8 [[TMP2]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[TMP3:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK19-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i8*, i8** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1 +// CHECK19-NEXT: store i8 [[TMP5]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[TMP6:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK19-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK19-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK19-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: store i8 [[TMP4]], i8* [[I4]], align 1 -// CHECK19-NEXT: [[TMP5:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK19-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: store i8 [[TMP7]], i8* [[I]], align 1 +// CHECK19-NEXT: [[TMP8:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK19-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK19-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !22 -// CHECK19-NEXT: [[CONV9:%.*]] = sext i8 [[TMP16]] to i32 // CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 -// CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK19-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK19-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK19-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19: omp.inner.for.body: +// CHECK19-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !22 +// CHECK19-NEXT: [[CONV8:%.*]] = sext i8 [[TMP19]] to i32 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK19-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK19-NEXT: store i8 [[CONV10]], i8* [[I5]], align 1, !llvm.access.group !22 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK19-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK19-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK19-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK19-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP23:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[CONV13:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK19-NEXT: [[TMP24:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[CONV14:%.*]] = sext i8 [[TMP24]] to i32 -// CHECK19-NEXT: [[SUB15:%.*]] = sub i32 10, [[CONV14]] -// CHECK19-NEXT: [[SUB16:%.*]] = sub i32 [[SUB15]], 1 -// CHECK19-NEXT: [[ADD17:%.*]] = add i32 [[SUB16]], 1 -// CHECK19-NEXT: [[DIV18:%.*]] = udiv i32 [[ADD17]], 1 -// CHECK19-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 -// CHECK19-NEXT: [[ADD20:%.*]] = add nsw i32 [[CONV13]], [[MUL19]] -// CHECK19-NEXT: [[CONV21:%.*]] = trunc i32 [[ADD20]] to i8 -// CHECK19-NEXT: store i8 [[CONV21]], i8* [[TMP0]], align 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[CONV12:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK19-NEXT: [[TMP27:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[CONV13:%.*]] = sext i8 [[TMP27]] to i32 +// CHECK19-NEXT: [[SUB14:%.*]] = sub i32 10, [[CONV13]] +// CHECK19-NEXT: [[SUB15:%.*]] = sub i32 [[SUB14]], 1 +// CHECK19-NEXT: [[ADD16:%.*]] = add i32 [[SUB15]], 1 +// CHECK19-NEXT: [[DIV17:%.*]] = udiv i32 [[ADD16]], 1 +// CHECK19-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 +// CHECK19-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV12]], [[MUL18]] +// CHECK19-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 +// CHECK19-NEXT: store i8 [[CONV20]], i8* [[TMP2]], align 1 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK19: .omp.final.done: // CHECK19-NEXT: br label [[OMP_PRECOND_END]] @@ -6191,18 +6396,21 @@ // CHECK19-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i16* [[CONV]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i16* [[CONV]], i16** [[TMP0]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6212,73 +6420,75 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK19-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK19-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK19-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 100, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6293,23 +6503,29 @@ // CHECK21-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK21-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK21-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK21-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK21-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK21-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK21-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined. to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK21-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK21-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6319,84 +6535,86 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK21-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK21-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK21-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 -// CHECK21-NEXT: [[TMP4:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK21-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP4]], i64 16) ] +// CHECK21-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK21-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP9]], i64 16) ] // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !9 -// CHECK21-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]] -// CHECK21-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !9 -// CHECK21-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK21-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM2]] -// CHECK21-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[MUL4:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK21-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !9 -// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM5]] -// CHECK21-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP21]] -// CHECK21-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !9 -// CHECK21-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK21-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM8]] +// CHECK21-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !9 +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK21-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM]] +// CHECK21-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK21-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !9 +// CHECK21-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK21-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK21-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM2]] +// CHECK21-NEXT: [[TMP23:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !9 +// CHECK21-NEXT: [[MUL4:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK21-NEXT: [[TMP24:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !9 +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK21-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP24]], i64 [[IDXPROM5]] +// CHECK21-NEXT: [[TMP26:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !9 +// CHECK21-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP26]] +// CHECK21-NEXT: [[TMP27:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !9 +// CHECK21-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK21-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK21-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP27]], i64 [[IDXPROM8]] // CHECK21-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !9 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK21-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK21-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK21-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK21-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK21-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK21-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 32000001, i32* [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6411,23 +6629,29 @@ // CHECK21-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK21-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK21-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK21-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK21-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK21-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK21-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK21-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK21-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6437,82 +6661,84 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK21-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK21-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK21-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK21-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK21-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK21-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK21-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8, !nontemporal !16 -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK21-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]] -// CHECK21-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK21-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK21-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK21-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM2]] -// CHECK21-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX3]], align 4 -// CHECK21-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK21-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK21-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]] -// CHECK21-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4 -// CHECK21-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK21-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8, !nontemporal !16 -// CHECK21-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK21-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK21-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]] +// CHECK21-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 8, !nontemporal !16 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK21-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM]] +// CHECK21-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK21-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 8 +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK21-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK21-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM2]] +// CHECK21-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK21-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK21-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 8 +// CHECK21-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK21-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM5]] +// CHECK21-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK21-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK21-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 8, !nontemporal !16 +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK21-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK21-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM8]] // CHECK21-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK21-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK21-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK21-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK21-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 32, i32* [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6527,23 +6753,29 @@ // CHECK21-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK21-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK21-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK21-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK21-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK21-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK21-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..2 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK21-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK21-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6553,99 +6785,101 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK21-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK21-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK21-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK21-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK21-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK21-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK21: omp.dispatch.cond: -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK21: omp.dispatch.body: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK21-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK21-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK21-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !19 -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK21-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !19 -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK21-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK21-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK21-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !19 -// CHECK21-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK21-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK21-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK21-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !19 -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK21-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] +// CHECK21-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !19 +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK21-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM]] +// CHECK21-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !19 +// CHECK21-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !19 +// CHECK21-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK21-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK21-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM3]] +// CHECK21-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !19 +// CHECK21-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK21-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !19 +// CHECK21-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK21-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK21-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP25]], i64 [[IDXPROM6]] +// CHECK21-NEXT: [[TMP27:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !19 +// CHECK21-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK21-NEXT: [[TMP28:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !19 +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK21-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK21-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP28]], i64 [[IDXPROM9]] // CHECK21-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !19 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 +// CHECK21-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK21-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK21-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK21: omp.dispatch.inc: -// CHECK21-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK21-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK21-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK21-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK21-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK21-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK21-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK21-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK21-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK21: omp.dispatch.end: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK21-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK21-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK21-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK21-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 -2147483522, i32* [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6658,152 +6892,158 @@ // CHECK21-NEXT: entry: // CHECK21-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK21-NEXT: store i64 [[I]], i64* [[I_ADDR]], align 8 // CHECK21-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i8* // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8*, i8*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i8* [[CONV]], i8* [[CONV1]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store i8* [[CONV]], i8** [[TMP0]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: store i8* [[CONV1]], i8** [[TMP1]], align 8 +// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[I:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR0]] { +// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK21-NEXT: [[I_ADDR:%.*]] = alloca i8*, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK21-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK21-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK21-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK21-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK21-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i8* [[I]], i8** [[I_ADDR]], align 8 -// CHECK21-NEXT: store i8* [[A]], i8** [[A_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load i8*, i8** [[I_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i8*, i8** [[A_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 -// CHECK21-NEXT: store i8 [[TMP2]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[TMP3:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK21-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i8*, i8** [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1 +// CHECK21-NEXT: store i8 [[TMP5]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[TMP6:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK21-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK21-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK21-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK21-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK21-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: store i8 [[TMP4]], i8* [[I4]], align 1 -// CHECK21-NEXT: [[TMP5:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK21-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK21-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: store i8 [[TMP7]], i8* [[I]], align 1 +// CHECK21-NEXT: [[TMP8:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK21-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK21-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK21: omp.precond.then: // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK21-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK21-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK21-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK21-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK21-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP1]], align 1 -// CHECK21-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP14]], 0 +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP17:%.*]] = load i8, i8* [[TMP4]], align 1 +// CHECK21-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP17]], 0 // CHECK21-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 -// CHECK21-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK21-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !22 -// CHECK21-NEXT: [[CONV9:%.*]] = sext i8 [[TMP17]] to i32 // CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK21-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK21-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK21-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !nontemporal !16, !llvm.access.group !22 +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK21-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK21-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21: omp.inner.for.body: +// CHECK21-NEXT: [[TMP20:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !22 +// CHECK21-NEXT: [[CONV8:%.*]] = sext i8 [[TMP20]] to i32 +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK21-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK21-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK21-NEXT: store i8 [[CONV10]], i8* [[I5]], align 1, !nontemporal !16, !llvm.access.group !22 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK21-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK21-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK21-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK21-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK21-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_IF_END:%.*]] // CHECK21: omp_if.else: -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] -// CHECK21: omp.inner.for.cond13: -// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK21-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END23:%.*]] -// CHECK21: omp.inner.for.body15: -// CHECK21-NEXT: [[TMP22:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[CONV16:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK21: omp.inner.for.cond12: // CHECK21-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP23]], 1 -// CHECK21-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV16]], [[MUL17]] -// CHECK21-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK21-NEXT: store i8 [[CONV19]], i8* [[I6]], align 1 -// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE20:%.*]] -// CHECK21: omp.body.continue20: -// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC21:%.*]] -// CHECK21: omp.inner.for.inc21: -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK21-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP25:![0-9]+]] -// CHECK21: omp.inner.for.end23: +// CHECK21-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP13:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK21-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END22:%.*]] +// CHECK21: omp.inner.for.body14: +// CHECK21-NEXT: [[TMP25:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[CONV15:%.*]] = sext i8 [[TMP25]] to i32 +// CHECK21-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[MUL16:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK21-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV15]], [[MUL16]] +// CHECK21-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK21-NEXT: store i8 [[CONV18]], i8* [[I5]], align 1 +// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE19:%.*]] +// CHECK21: omp.body.continue19: +// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC20:%.*]] +// CHECK21: omp.inner.for.inc20: +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK21-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK21: omp.inner.for.end22: // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK21-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK21-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK21-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK21-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: -// CHECK21-NEXT: [[TMP29:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[CONV24:%.*]] = sext i8 [[TMP29]] to i32 -// CHECK21-NEXT: [[TMP30:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[CONV25:%.*]] = sext i8 [[TMP30]] to i32 -// CHECK21-NEXT: [[SUB26:%.*]] = sub i32 10, [[CONV25]] -// CHECK21-NEXT: [[SUB27:%.*]] = sub i32 [[SUB26]], 1 -// CHECK21-NEXT: [[ADD28:%.*]] = add i32 [[SUB27]], 1 -// CHECK21-NEXT: [[DIV29:%.*]] = udiv i32 [[ADD28]], 1 -// CHECK21-NEXT: [[MUL30:%.*]] = mul nsw i32 [[DIV29]], 1 -// CHECK21-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV24]], [[MUL30]] -// CHECK21-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK21-NEXT: store i8 [[CONV32]], i8* [[TMP0]], align 1 +// CHECK21-NEXT: [[TMP32:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[CONV23:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK21-NEXT: [[TMP33:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[CONV24:%.*]] = sext i8 [[TMP33]] to i32 +// CHECK21-NEXT: [[SUB25:%.*]] = sub i32 10, [[CONV24]] +// CHECK21-NEXT: [[SUB26:%.*]] = sub i32 [[SUB25]], 1 +// CHECK21-NEXT: [[ADD27:%.*]] = add i32 [[SUB26]], 1 +// CHECK21-NEXT: [[DIV28:%.*]] = udiv i32 [[ADD27]], 1 +// CHECK21-NEXT: [[MUL29:%.*]] = mul nsw i32 [[DIV28]], 1 +// CHECK21-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV23]], [[MUL29]] +// CHECK21-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK21-NEXT: store i8 [[CONV31]], i8* [[TMP2]], align 1 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK21: .omp.final.done: // CHECK21-NEXT: br label [[OMP_PRECOND_END]] @@ -6815,18 +7055,21 @@ // CHECK21-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i16* [[CONV]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store i16* [[CONV]], i16** [[TMP0]], align 8 +// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6836,73 +7079,75 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 +// CHECK21-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 8 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK21-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK21: omp.dispatch.cond: -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK21: omp.dispatch.body: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK21-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK21: omp.dispatch.inc: -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK21-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK21-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK21-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK21-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK21-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK21: omp.dispatch.end: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK21-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK21-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK21-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 100, i32* [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6917,23 +7162,29 @@ // CHECK23-NEXT: [[B_ADDR:%.*]] = alloca float*, align 4 // CHECK23-NEXT: [[C_ADDR:%.*]] = alloca float*, align 4 // CHECK23-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK23-NEXT: store float* [[A]], float** [[A_ADDR]], align 4 // CHECK23-NEXT: store float* [[B]], float** [[B_ADDR]], align 4 // CHECK23-NEXT: store float* [[C]], float** [[C_ADDR]], align 4 // CHECK23-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined. to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 4 +// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca float**, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca float**, align 4 -// CHECK23-NEXT: [[C_ADDR:%.*]] = alloca float**, align 4 -// CHECK23-NEXT: [[D_ADDR:%.*]] = alloca float**, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6943,80 +7194,82 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store float** [[A]], float*** [[A_ADDR]], align 4 -// CHECK23-NEXT: store float** [[B]], float*** [[B_ADDR]], align 4 -// CHECK23-NEXT: store float** [[C]], float*** [[C_ADDR]], align 4 -// CHECK23-NEXT: store float** [[D]], float*** [[D_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load float*, float** [[TMP0]], align 4 -// CHECK23-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP4]], i32 16) ] +// CHECK23-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load float*, float** [[TMP2]], align 4 +// CHECK23-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP9]], i32 16) ] // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i32 [[TMP14]] -// CHECK23-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP16]], i32 [[TMP17]] -// CHECK23-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[MUL3:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK23-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP19]], i32 [[TMP20]] -// CHECK23-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP21]] -// CHECK23-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 [[TMP23]] +// CHECK23-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP4]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP18]], i32 [[TMP19]] +// CHECK23-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP6]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP21]], i32 [[TMP22]] +// CHECK23-NEXT: [[TMP23:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[MUL3:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK23-NEXT: [[TMP24:%.*]] = load float*, float** [[TMP8]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP24]], i32 [[TMP25]] +// CHECK23-NEXT: [[TMP26:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP26]] +// CHECK23-NEXT: [[TMP27:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP27]], i32 [[TMP28]] // CHECK23-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !10 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK23-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK23-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK23-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK23-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK23-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 32000001, i32* [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7031,23 +7284,29 @@ // CHECK23-NEXT: [[B_ADDR:%.*]] = alloca float*, align 4 // CHECK23-NEXT: [[C_ADDR:%.*]] = alloca float*, align 4 // CHECK23-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK23-NEXT: store float* [[A]], float** [[A_ADDR]], align 4 // CHECK23-NEXT: store float* [[B]], float** [[B_ADDR]], align 4 // CHECK23-NEXT: store float* [[C]], float** [[C_ADDR]], align 4 // CHECK23-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 4 +// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca float**, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca float**, align 4 -// CHECK23-NEXT: [[C_ADDR:%.*]] = alloca float**, align 4 -// CHECK23-NEXT: [[D_ADDR:%.*]] = alloca float**, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7057,78 +7316,80 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store float** [[A]], float*** [[A_ADDR]], align 4 -// CHECK23-NEXT: store float** [[B]], float*** [[B_ADDR]], align 4 -// CHECK23-NEXT: store float** [[C]], float*** [[C_ADDR]], align 4 -// CHECK23-NEXT: store float** [[D]], float*** [[D_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 4 +// CHECK23-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK23-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK23-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 4, !nontemporal !17 -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 [[TMP13]] -// CHECK23-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK23-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 4 -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK23-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP15]], i32 [[TMP16]] -// CHECK23-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX2]], align 4 -// CHECK23-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK23-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 4 -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK23-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP18]], i32 [[TMP19]] -// CHECK23-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX4]], align 4 -// CHECK23-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK23-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 4, !nontemporal !17 -// CHECK23-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP21]], i32 [[TMP22]] +// CHECK23-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 4, !nontemporal !17 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] +// CHECK23-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 4 +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK23-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] +// CHECK23-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +// CHECK23-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK23-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 4 +// CHECK23-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK23-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] +// CHECK23-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK23-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK23-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 4, !nontemporal !17 +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP26]], i32 [[TMP27]] // CHECK23-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK23-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK23-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK23-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK23-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 32, i32* [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7143,23 +7404,29 @@ // CHECK23-NEXT: [[B_ADDR:%.*]] = alloca float*, align 4 // CHECK23-NEXT: [[C_ADDR:%.*]] = alloca float*, align 4 // CHECK23-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK23-NEXT: store float* [[A]], float** [[A_ADDR]], align 4 // CHECK23-NEXT: store float* [[B]], float** [[B_ADDR]], align 4 // CHECK23-NEXT: store float* [[C]], float** [[C_ADDR]], align 4 // CHECK23-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..2 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 4 +// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[A:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[B:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[C:%.*]], float** noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca float**, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca float**, align 4 -// CHECK23-NEXT: [[C_ADDR:%.*]] = alloca float**, align 4 -// CHECK23-NEXT: [[D_ADDR:%.*]] = alloca float**, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7169,95 +7436,97 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store float** [[A]], float*** [[A_ADDR]], align 4 -// CHECK23-NEXT: store float** [[B]], float*** [[B_ADDR]], align 4 -// CHECK23-NEXT: store float** [[C]], float*** [[C_ADDR]], align 4 -// CHECK23-NEXT: store float** [[D]], float*** [[D_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 4 +// CHECK23-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK23-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK23-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK23: omp.dispatch.cond: -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK23: omp.dispatch.body: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK23-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK23-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 [[TMP15]] -// CHECK23-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] -// CHECK23-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK23-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] -// CHECK23-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK23-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] +// CHECK23-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP4]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP19]], i32 [[TMP20]] +// CHECK23-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP6]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 [[TMP23]] +// CHECK23-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[MUL4:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK23-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP8]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP25]], i32 [[TMP26]] +// CHECK23-NEXT: [[TMP27:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP27]] +// CHECK23-NEXT: [[TMP28:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP28]], i32 [[TMP29]] // CHECK23-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !20 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 +// CHECK23-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[ADD8:%.*]] = add i32 [[TMP30]], 1 // CHECK23-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK23: omp.dispatch.inc: -// CHECK23-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK23-NEXT: [[ADD9:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK23-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK23-NEXT: [[ADD9:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK23-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK23-NEXT: [[ADD10:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK23-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK23-NEXT: [[ADD10:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK23-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK23: omp.dispatch.end: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK23-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK23-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK23-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK23-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 -2147483522, i32* [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7270,152 +7539,158 @@ // CHECK23-NEXT: entry: // CHECK23-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK23-NEXT: store i32 [[I]], i32* [[I_ADDR]], align 4 // CHECK23-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[I_ADDR]] to i8* // CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[A_ADDR]] to i8* -// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8*, i8*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i8* [[CONV]], i8* [[CONV1]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store i8* [[CONV]], i8** [[TMP0]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: store i8* [[CONV1]], i8** [[TMP1]], align 4 +// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[I:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR0]] { +// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK23-NEXT: [[I_ADDR:%.*]] = alloca i8*, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK23-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK23-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK23-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i8* [[I]], i8** [[I_ADDR]], align 4 -// CHECK23-NEXT: store i8* [[A]], i8** [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load i8*, i8** [[I_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i8*, i8** [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 -// CHECK23-NEXT: store i8 [[TMP2]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[TMP3:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK23-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i8*, i8** [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1 +// CHECK23-NEXT: store i8 [[TMP5]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[TMP6:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK23-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK23-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK23-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK23-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK23-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: store i8 [[TMP4]], i8* [[I4]], align 1 -// CHECK23-NEXT: [[TMP5:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK23-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK23-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: store i8 [[TMP7]], i8* [[I]], align 1 +// CHECK23-NEXT: [[TMP8:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK23-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK23-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK23: omp.precond.then: // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK23-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK23-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK23-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK23-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK23-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP1]], align 1 -// CHECK23-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP14]], 0 +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = load i8, i8* [[TMP4]], align 1 +// CHECK23-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP17]], 0 // CHECK23-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 -// CHECK23-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK23-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !23 -// CHECK23-NEXT: [[CONV9:%.*]] = sext i8 [[TMP17]] to i32 // CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK23-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK23-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK23-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !nontemporal !17, !llvm.access.group !23 +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK23-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK23-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK23: omp.inner.for.body: +// CHECK23-NEXT: [[TMP20:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !23 +// CHECK23-NEXT: [[CONV8:%.*]] = sext i8 [[TMP20]] to i32 +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK23-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK23-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK23-NEXT: store i8 [[CONV10]], i8* [[I5]], align 1, !nontemporal !17, !llvm.access.group !23 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK23-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK23-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK23-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK23-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_IF_END:%.*]] // CHECK23: omp_if.else: -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] -// CHECK23: omp.inner.for.cond13: -// CHECK23-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK23-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END23:%.*]] -// CHECK23: omp.inner.for.body15: -// CHECK23-NEXT: [[TMP22:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[CONV16:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK23: omp.inner.for.cond12: // CHECK23-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP23]], 1 -// CHECK23-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV16]], [[MUL17]] -// CHECK23-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK23-NEXT: store i8 [[CONV19]], i8* [[I6]], align 1 -// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE20:%.*]] -// CHECK23: omp.body.continue20: -// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC21:%.*]] -// CHECK23: omp.inner.for.inc21: -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK23-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP26:![0-9]+]] -// CHECK23: omp.inner.for.end23: +// CHECK23-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP13:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK23-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END22:%.*]] +// CHECK23: omp.inner.for.body14: +// CHECK23-NEXT: [[TMP25:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[CONV15:%.*]] = sext i8 [[TMP25]] to i32 +// CHECK23-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[MUL16:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK23-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV15]], [[MUL16]] +// CHECK23-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK23-NEXT: store i8 [[CONV18]], i8* [[I5]], align 1 +// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE19:%.*]] +// CHECK23: omp.body.continue19: +// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC20:%.*]] +// CHECK23: omp.inner.for.inc20: +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK23-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK23: omp.inner.for.end22: // CHECK23-NEXT: br label [[OMP_IF_END]] // CHECK23: omp_if.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK23-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK23-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK23-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK23-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: -// CHECK23-NEXT: [[TMP29:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[CONV24:%.*]] = sext i8 [[TMP29]] to i32 -// CHECK23-NEXT: [[TMP30:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[CONV25:%.*]] = sext i8 [[TMP30]] to i32 -// CHECK23-NEXT: [[SUB26:%.*]] = sub i32 10, [[CONV25]] -// CHECK23-NEXT: [[SUB27:%.*]] = sub i32 [[SUB26]], 1 -// CHECK23-NEXT: [[ADD28:%.*]] = add i32 [[SUB27]], 1 -// CHECK23-NEXT: [[DIV29:%.*]] = udiv i32 [[ADD28]], 1 -// CHECK23-NEXT: [[MUL30:%.*]] = mul nsw i32 [[DIV29]], 1 -// CHECK23-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV24]], [[MUL30]] -// CHECK23-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK23-NEXT: store i8 [[CONV32]], i8* [[TMP0]], align 1 +// CHECK23-NEXT: [[TMP32:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[CONV23:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK23-NEXT: [[TMP33:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[CONV24:%.*]] = sext i8 [[TMP33]] to i32 +// CHECK23-NEXT: [[SUB25:%.*]] = sub i32 10, [[CONV24]] +// CHECK23-NEXT: [[SUB26:%.*]] = sub i32 [[SUB25]], 1 +// CHECK23-NEXT: [[ADD27:%.*]] = add i32 [[SUB26]], 1 +// CHECK23-NEXT: [[DIV28:%.*]] = udiv i32 [[ADD27]], 1 +// CHECK23-NEXT: [[MUL29:%.*]] = mul nsw i32 [[DIV28]], 1 +// CHECK23-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV23]], [[MUL29]] +// CHECK23-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK23-NEXT: store i8 [[CONV31]], i8* [[TMP2]], align 1 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK23: .omp.final.done: // CHECK23-NEXT: br label [[OMP_PRECOND_END]] @@ -7427,18 +7702,21 @@ // CHECK23-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK23-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i16* [[CONV]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store i16* [[CONV]], i16** [[TMP0]], align 4 +// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7448,73 +7726,75 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 +// CHECK23-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK23-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK23: omp.dispatch.cond: -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK23: omp.dispatch.body: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 -// CHECK23-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK23-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK23-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK23: omp.dispatch.inc: -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK23-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK23-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK23-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK23: omp.dispatch.end: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK23-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK23-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 100, i32* [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp @@ -179,6 +179,7 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 @@ -188,20 +189,25 @@ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* // CHECK1-NEXT: store double* [[CONV1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[CONV]], double* [[TMP0]], i32* [[CONV2]], float* [[CONV3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[CONV]], double** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP2]], double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[CONV3]], float** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -210,107 +216,109 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store double* [[G]], double** [[G_ADDR]], align 8 -// CHECK1-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store double* [[TMP1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[TMP4]], double** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK1-NEXT: store double* [[TMP4]], double** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP9]], double** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load volatile double, double* [[TMP0]], align 8 -// CHECK1-NEXT: store double [[TMP5]], double* [[G3]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load volatile double, double* [[TMP6]], align 8 -// CHECK1-NEXT: store double [[TMP7]], double* [[G14]], align 8 -// CHECK1-NEXT: store double* [[G14]], double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[SVAR6]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load float, float* [[TMP3]], align 4 -// CHECK1-NEXT: store float [[TMP9]], float* [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load volatile double, double* [[TMP2]], align 8 +// CHECK1-NEXT: store double [[TMP10]], double* [[G]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load volatile double, double* [[TMP11]], align 8 +// CHECK1-NEXT: store double [[TMP12]], double* [[G1]], align 8 +// CHECK1-NEXT: store double* [[G1]], double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load float, float* [[TMP8]], align 4 +// CHECK1-NEXT: store float [[TMP14]], float* [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP18:%.*]] = load double, double* [[G3]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP18]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD9]], double* [[G3]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP19:%.*]] = load double*, double** [[_TMP5]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP20:%.*]] = load volatile double, double* [[TMP19]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD10:%.*]] = fadd double [[TMP20]], 1.000000e+00 -// CHECK1-NEXT: store volatile double [[ADD10]], double* [[TMP19]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[SVAR6]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 3 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[SVAR6]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP22:%.*]] = load float, float* [[SFVAR7]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP22]] to double -// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[CONV]], 4.000000e+00 -// CHECK1-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK1-NEXT: store float [[CONV13]], float* [[SFVAR7]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[G3]], double** [[TMP23]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP25:%.*]] = load double*, double** [[_TMP5]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: store double* [[TMP25]], double** [[TMP24]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[SVAR6]], i32** [[TMP26]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[SFVAR7]], float** [[TMP27]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP23:%.*]] = load double, double* [[G]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD5]], double* [[G]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP3]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP25:%.*]] = load volatile double, double* [[TMP24]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[ADD6:%.*]] = fadd double [[TMP25]], 1.000000e+00 +// CHECK1-NEXT: store volatile double [[ADD6]], double* [[TMP24]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[SVAR]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 3 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[SVAR]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP27:%.*]] = load float, float* [[SFVAR]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP27]] to double +// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 4.000000e+00 +// CHECK1-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float +// CHECK1-NEXT: store float [[CONV9]], float* [[SFVAR]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[G]], double** [[TMP28]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load double*, double** [[_TMP3]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store double* [[TMP30]], double** [[TMP29]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP31]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP32]], align 8, !llvm.access.group !4 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -354,6 +362,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 @@ -368,20 +377,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, double* [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], double* [[G13]], align 8 // CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[G2]], double* [[TMP5]], i32* [[SVAR_ADDR]], float* [[CONV]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G2]], double** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load double*, double** [[_TMP4]], align 4 +// CHECK3-NEXT: store double* [[TMP7]], double** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[CONV]], float** [[TMP9]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -390,107 +404,109 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store double* [[TMP1]], double** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[TMP4]], double** [[_TMP1]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK3-NEXT: store double* [[TMP4]], double** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[TMP9]], double** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load volatile double, double* [[TMP0]], align 8 -// CHECK3-NEXT: store double [[TMP5]], double* [[G3]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[_TMP1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load volatile double, double* [[TMP6]], align 4 -// CHECK3-NEXT: store double [[TMP7]], double* [[G14]], align 8 -// CHECK3-NEXT: store double* [[G14]], double** [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[SVAR6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load float, float* [[TMP3]], align 4 -// CHECK3-NEXT: store float [[TMP9]], float* [[SFVAR7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load volatile double, double* [[TMP2]], align 8 +// CHECK3-NEXT: store double [[TMP10]], double* [[G]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load volatile double, double* [[TMP11]], align 4 +// CHECK3-NEXT: store double [[TMP12]], double* [[G1]], align 8 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load float, float* [[TMP8]], align 4 +// CHECK3-NEXT: store float [[TMP14]], float* [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP18:%.*]] = load double, double* [[G3]], align 8, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP18]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD9]], double* [[G3]], align 8, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP19:%.*]] = load double*, double** [[_TMP5]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP20:%.*]] = load volatile double, double* [[TMP19]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD10:%.*]] = fadd double [[TMP20]], 1.000000e+00 -// CHECK3-NEXT: store volatile double [[ADD10]], double* [[TMP19]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[SVAR6]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 3 -// CHECK3-NEXT: store i32 [[ADD11]], i32* [[SVAR6]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP22:%.*]] = load float, float* [[SFVAR7]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP22]] to double -// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[CONV]], 4.000000e+00 -// CHECK3-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK3-NEXT: store float [[CONV13]], float* [[SFVAR7]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G3]], double** [[TMP23]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP25:%.*]] = load double*, double** [[_TMP5]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store double* [[TMP25]], double** [[TMP24]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR6]], i32** [[TMP26]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[SFVAR7]], float** [[TMP27]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP23:%.*]] = load double, double* [[G]], align 8, !llvm.access.group !5 +// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD5]], double* [[G]], align 8, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP3]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP25:%.*]] = load volatile double, double* [[TMP24]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP25]], 1.000000e+00 +// CHECK3-NEXT: store volatile double [[ADD6]], double* [[TMP24]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[SVAR]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 3 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[SVAR]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP27:%.*]] = load float, float* [[SFVAR]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP27]] to double +// CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 4.000000e+00 +// CHECK3-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float +// CHECK3-NEXT: store float [[CONV9]], float* [[SFVAR]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP28]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP30:%.*]] = load double*, double** [[_TMP3]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store double* [[TMP30]], double** [[TMP29]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP31]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP32]], align 4, !llvm.access.group !5 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK3-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK3-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -682,6 +698,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -693,21 +710,27 @@ // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK9-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], [2 x i32]* [[TMP0]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[CONV1]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP8]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -716,133 +739,135 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP8:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP4:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: store %struct.S* [[TMP5]], %struct.S** [[_TMP1]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP11]], %struct.S** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: [[TMP8:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP7]], i8* align 4 [[TMP8]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP9:%.*]] = bitcast [2 x %struct.S]* [[TMP2]] to %struct.S* -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP10]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: [[TMP14:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP16]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP17:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP10]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done6: -// CHECK9-NEXT: [[TMP13:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK9-NEXT: [[TMP15:%.*]] = bitcast %struct.S* [[TMP13]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false) -// CHECK9-NEXT: store %struct.S* [[VAR7]], %struct.S** [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[SVAR9]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP18]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done3: +// CHECK9-NEXT: [[TMP19:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK9-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[TMP19]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) +// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP4]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[SVAR]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP24]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP25]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP25]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP27:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM11]] -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast %struct.S* [[ARRAYIDX12]] to i8* -// CHECK9-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[TMP27]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i64 4, i1 false), !llvm.access.group !5 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP32]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP31]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP33:%.*]] = load %struct.S*, %struct.S** [[_TMP4]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK9-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* +// CHECK9-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[TMP33]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false), !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK9-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP42]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -1012,6 +1037,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 @@ -1021,20 +1047,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK9-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[CONV]], [2 x i32]* [[TMP0]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S.0* [[TMP7]], %struct.S.0** [[TMP6]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1043,128 +1074,130 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP8:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP4:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[_TMP1]], align 8 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 8 +// CHECK9-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S.0* [[TMP9]], %struct.S.0** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP6]], i8* align 4 [[TMP7]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP8:%.*]] = bitcast [2 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP9]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S.0]* [[TMP6]] to %struct.S.0* +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP8]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP10:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP11:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP10]], i8* align 4 [[TMP11]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP16:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done6: -// CHECK9-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[VAR7]] to i8* -// CHECK9-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false) -// CHECK9-NEXT: store %struct.S.0* [[VAR7]], %struct.S.0** [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done3: +// CHECK9-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// CHECK9-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[TMP17]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) +// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP4]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP25:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[ARRAYIDX11]] to i8* -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast %struct.S.0* [[TMP25]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 4, i1 false), !llvm.access.group !11 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP30:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP4]], align 8, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK9-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[ARRAYIDX7]] to i8* +// CHECK9-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[TMP30]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false), !llvm.access.group !11 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]]) -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK9-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK9-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN13]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP34]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP39]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done14: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -1358,6 +1391,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1367,21 +1401,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 // CHECK11-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[T_VAR_ADDR]], [2 x i32]* [[TMP0]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1390,131 +1430,133 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP8:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP4:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP5]], %struct.S** [[_TMP1]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP11]], %struct.S** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[T_VAR3]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK11-NEXT: [[TMP8:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP7]], i8* align 4 [[TMP8]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP9:%.*]] = bitcast [2 x %struct.S]* [[TMP2]] to %struct.S* -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP10]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: [[TMP14:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP16]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP17:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP10]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done6: -// CHECK11-NEXT: [[TMP13:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK11-NEXT: [[TMP15:%.*]] = bitcast %struct.S* [[TMP13]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false) -// CHECK11-NEXT: store %struct.S* [[VAR7]], %struct.S** [[_TMP8]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[SVAR9]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP18]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done3: +// CHECK11-NEXT: [[TMP19:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK11-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[TMP19]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) +// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP4]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[SVAR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP24]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP25]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i32 0, i32 [[TMP26]] -// CHECK11-NEXT: store i32 [[TMP25]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP27:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[TMP27]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i32 4, i1 false), !llvm.access.group !6 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP32]] +// CHECK11-NEXT: store i32 [[TMP31]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP33:%.*]] = load %struct.S*, %struct.S** [[_TMP4]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* +// CHECK11-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[TMP33]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i32 4, i1 false), !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK11-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP42]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -1683,6 +1725,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 @@ -1691,20 +1734,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 // CHECK11-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[T_VAR_ADDR]], [2 x i32]* [[TMP0]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S.0* [[TMP7]], %struct.S.0** [[TMP6]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1713,126 +1761,128 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP8:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP4:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[_TMP1]], align 4 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 4 +// CHECK11-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S.0* [[TMP9]], %struct.S.0** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[T_VAR3]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK11-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 [[TMP7]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP8:%.*]] = bitcast [2 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP9]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S.0]* [[TMP6]] to %struct.S.0* +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP8]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP10:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP11:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 [[TMP11]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP16:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done6: -// CHECK11-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[VAR7]] to i8* -// CHECK11-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false) -// CHECK11-NEXT: store %struct.S.0* [[VAR7]], %struct.S.0** [[_TMP8]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done3: +// CHECK11-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// CHECK11-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[TMP17]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) +// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP4]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i32 0, i32 [[TMP24]] -// CHECK11-NEXT: store i32 [[TMP23]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP25:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[ARRAYIDX10]] to i8* -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast %struct.S.0* [[TMP25]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i32 4, i1 false), !llvm.access.group !12 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP29]] +// CHECK11-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP30:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP4]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP31]] +// CHECK11-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* +// CHECK11-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[TMP30]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false), !llvm.access.group !12 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK11-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]]) -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK11-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK11-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP34]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP39]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp @@ -171,6 +171,7 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 @@ -180,20 +181,25 @@ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* // CHECK1-NEXT: store double* [[CONV1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[CONV]], double* [[TMP0]], i32* [[CONV2]], float* [[CONV3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[CONV]], double** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP2]], double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[CONV3]], float** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -202,106 +208,108 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store double* [[G]], double** [[G_ADDR]], align 8 -// CHECK1-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store double* [[TMP1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[TMP4]], double** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK1-NEXT: store double* [[TMP4]], double** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP9]], double** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load double*, double** [[_TMP1]], align 8 -// CHECK1-NEXT: store double* [[G14]], double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP1]], align 8 +// CHECK1-NEXT: store double* [[G1]], double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: store double 1.000000e+00, double* [[G3]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP5]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP14]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: store i32 3, i32* [[SVAR6]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR7]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[G3]], double** [[TMP15]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP17:%.*]] = load double*, double** [[_TMP5]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: store double* [[TMP17]], double** [[TMP16]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[SVAR6]], i32** [[TMP18]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[SFVAR7]], float** [[TMP19]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store double 1.000000e+00, double* [[G]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP19:%.*]] = load double*, double** [[_TMP3]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP19]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store i32 3, i32* [[SVAR]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[G]], double** [[TMP20]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load double*, double** [[_TMP3]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store double* [[TMP22]], double** [[TMP21]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP23]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP24]], align 8, !llvm.access.group !4 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP25]], double* [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load double, double* [[TMP26]], align 8 -// CHECK1-NEXT: store volatile double [[TMP27]], double* [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP28]], i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load float, float* [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP29]], float* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load double, double* [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP30]], double* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load double*, double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load double, double* [[TMP31]], align 8 +// CHECK1-NEXT: store volatile double [[TMP32]], double* [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP33]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP34]], float* [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -343,6 +351,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 @@ -357,20 +366,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, double* [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], double* [[G13]], align 8 // CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[G2]], double* [[TMP5]], i32* [[SVAR_ADDR]], float* [[CONV]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G2]], double** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load double*, double** [[_TMP4]], align 4 +// CHECK3-NEXT: store double* [[TMP7]], double** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[CONV]], float** [[TMP9]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -379,106 +393,108 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store double* [[TMP1]], double** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[TMP4]], double** [[_TMP1]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK3-NEXT: store double* [[TMP4]], double** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[TMP9]], double** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double*, double** [[_TMP1]], align 4 -// CHECK3-NEXT: store double* [[G14]], double** [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP1]], align 4 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store double 1.000000e+00, double* [[G3]], align 8, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP5]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP14]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store i32 3, i32* [[SVAR6]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR7]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G3]], double** [[TMP15]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = load double*, double** [[_TMP5]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store double* [[TMP17]], double** [[TMP16]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR6]], i32** [[TMP18]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[SFVAR7]], float** [[TMP19]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store double 1.000000e+00, double* [[G]], align 8, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP19:%.*]] = load double*, double** [[_TMP3]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP19]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 3, i32* [[SVAR]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP20]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load double*, double** [[_TMP3]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store double* [[TMP22]], double** [[TMP21]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP23]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP24]], align 4, !llvm.access.group !5 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK3-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[G3]], align 8 -// CHECK3-NEXT: store volatile double [[TMP25]], double* [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load double*, double** [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load double, double* [[TMP26]], align 4 -// CHECK3-NEXT: store volatile double [[TMP27]], double* [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[SVAR6]], align 4 -// CHECK3-NEXT: store i32 [[TMP28]], i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load float, float* [[SFVAR7]], align 4 -// CHECK3-NEXT: store float [[TMP29]], float* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP30]], double* [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = load double*, double** [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load double, double* [[TMP31]], align 4 +// CHECK3-NEXT: store volatile double [[TMP32]], double* [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP33]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP34]], float* [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -669,6 +685,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -680,21 +697,27 @@ // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK9-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], [2 x i32]* [[TMP0]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[CONV1]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP8]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -703,33 +726,35 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: store %struct.S* [[TMP5]], %struct.S** [[_TMP1]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP11]], %struct.S** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -739,115 +764,115 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store %struct.S* [[VAR6]], %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* -// CHECK9-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[TMP17]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 4, i1 false), !llvm.access.group !5 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP21]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP23:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: [[TMP25:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* +// CHECK9-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[TMP23]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i64 4, i1 false), !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK9-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK9-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP28]], i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK9-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP31:%.*]] = bitcast [2 x %struct.S]* [[S_ARR5]] to %struct.S* -// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN13]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN13]], [[TMP32]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP34]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK9-NEXT: [[TMP36:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP37:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN8]], [[TMP38]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP31]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP37]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP40:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP39]], i8* align 4 [[TMP40]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP32]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done14: -// CHECK9-NEXT: [[TMP35:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[TMP6]] to i8* -// CHECK9-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP35]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP38]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP41:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = bitcast %struct.S* [[TMP12]] to i8* +// CHECK9-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[TMP41]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP44]], i32* [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN15]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP45]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done16: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1017,6 +1042,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 @@ -1026,20 +1052,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK9-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[CONV]], [2 x i32]* [[TMP0]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S.0* [[TMP7]], %struct.S.0** [[TMP6]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1048,30 +1079,32 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[_TMP1]], align 8 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 8 +// CHECK9-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S.0* [[TMP9]], %struct.S.0** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1081,113 +1114,113 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store %struct.S.0* [[VAR6]], %struct.S.0** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[ARRAYIDX10]] to i8* -// CHECK9-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[TMP16]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !11 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* +// CHECK9-NEXT: [[TMP24:%.*]] = bitcast %struct.S.0* [[TMP21]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP23]], i8* align 4 [[TMP24]], i64 4, i1 false), !llvm.access.group !11 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK9-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP27]], i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP30:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR5]] to %struct.S.0* -// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN12]], [[TMP31]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK9-NEXT: [[TMP34:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP35:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR]] to %struct.S.0* +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN8]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP30]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP35]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP37:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP38:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP31]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP34:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[TMP5]] to i8* -// CHECK9-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[TMP34]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP39:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = bitcast %struct.S.0* [[TMP10]] to i8* +// CHECK9-NEXT: [[TMP41:%.*]] = bitcast %struct.S.0* [[TMP39]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP37]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP42]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1382,6 +1415,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1391,21 +1425,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 // CHECK11-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[T_VAR_ADDR]], [2 x i32]* [[TMP0]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1414,33 +1454,35 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP5]], %struct.S** [[_TMP1]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP11]], %struct.S** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1450,113 +1492,113 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store %struct.S* [[VAR6]], %struct.S** [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 [[TMP18]] -// CHECK11-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[ARRAYIDX10]] to i8* -// CHECK11-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[TMP17]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i32 4, i1 false), !llvm.access.group !6 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP22]] +// CHECK11-NEXT: store i32 [[TMP21]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP23:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP24]] +// CHECK11-NEXT: [[TMP25:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8* +// CHECK11-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[TMP23]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i32 4, i1 false), !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK11-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK11-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK11-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK11-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP28]], i32* [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP31:%.*]] = bitcast [2 x %struct.S]* [[S_ARR5]] to %struct.S* -// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN12]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN12]], [[TMP32]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK11-NEXT: [[TMP36:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP37:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN7]], [[TMP38]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP31]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP37]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP40:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP39]], i8* align 4 [[TMP40]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP32]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done13: -// CHECK11-NEXT: [[TMP35:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[TMP6]] to i8* -// CHECK11-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP35]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK11-NEXT: store i32 [[TMP38]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done8: +// CHECK11-NEXT: [[TMP41:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = bitcast %struct.S* [[TMP12]] to i8* +// CHECK11-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[TMP41]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP44]], i32* [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN14]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP45]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done15: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done10: // CHECK11-NEXT: ret void // // @@ -1725,6 +1767,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 @@ -1733,20 +1776,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 // CHECK11-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[T_VAR_ADDR]], [2 x i32]* [[TMP0]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S.0* [[TMP7]], %struct.S.0** [[TMP6]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1755,30 +1803,32 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[_TMP1]], align 4 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 4 +// CHECK11-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S.0* [[TMP9]], %struct.S.0** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1788,111 +1838,111 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store %struct.S.0* [[VAR6]], %struct.S.0** [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* -// CHECK11-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[TMP16]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false), !llvm.access.group !12 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP20]] +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK11-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5]] to i8* +// CHECK11-NEXT: [[TMP24:%.*]] = bitcast %struct.S.0* [[TMP21]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP23]], i8* align 4 [[TMP24]], i32 4, i1 false), !llvm.access.group !12 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP27]], i32* [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR5]] to %struct.S.0* -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN11]], [[TMP31]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP32]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK11-NEXT: [[TMP34:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP35:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR]] to %struct.S.0* +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN7]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP30]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP35]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP37:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP38:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP31]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP34:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[TMP5]] to i8* -// CHECK11-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[TMP34]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done8: +// CHECK11-NEXT: [[TMP39:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = bitcast %struct.S.0* [[TMP10]] to i8* +// CHECK11-NEXT: [[TMP41:%.*]] = bitcast %struct.S.0* [[TMP39]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP37]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP42]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done10: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_simd_private_codegen.cpp b/clang/test/OpenMP/distribute_simd_private_codegen.cpp --- a/clang/test/OpenMP/distribute_simd_private_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_private_codegen.cpp @@ -131,15 +131,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 @@ -156,69 +158,71 @@ // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store double* undef, double** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store double* [[G1]], double** [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 // CHECK1-NEXT: store double 1.000000e+00, double* [[G]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP8:%.*]] = load double*, double** [[_TMP2]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP8]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[_TMP2]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP9]], align 8, !llvm.access.group !4 // CHECK1-NEXT: store i32 3, i32* [[SVAR]], align 4, !llvm.access.group !4 // CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[G]], double** [[TMP9]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP2]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: store double* [[TMP11]], double** [[TMP10]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP12]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP13]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[G]], double** [[TMP10]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP2]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store double* [[TMP12]], double** [[TMP11]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP13]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP14]], align 8, !llvm.access.group !4 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -249,15 +253,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca double*, align 4 @@ -274,69 +280,71 @@ // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store double* undef, double** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 // CHECK3-NEXT: store double 1.000000e+00, double* [[G]], align 8, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP8:%.*]] = load double*, double** [[_TMP2]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP8]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP9:%.*]] = load double*, double** [[_TMP2]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP9]], align 4, !llvm.access.group !5 // CHECK3-NEXT: store i32 3, i32* [[SVAR]], align 4, !llvm.access.group !5 // CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G]], double** [[TMP9]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP2]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store double* [[TMP11]], double** [[TMP10]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP12]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP13]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP10]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP2]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store double* [[TMP12]], double** [[TMP11]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP13]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP14]], align 4, !llvm.access.group !5 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -481,15 +489,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 8 @@ -506,6 +516,8 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store %struct.S* undef, %struct.S** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 @@ -523,74 +535,74 @@ // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !6 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !6 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 -// CHECK9-NEXT: [[TMP10:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 8, !llvm.access.group !6 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 +// CHECK9-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 8, !llvm.access.group !6 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK9-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8* -// CHECK9-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[TMP10]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false), !llvm.access.group !6 +// CHECK9-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8* +// CHECK9-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[TMP11]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false), !llvm.access.group !6 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -613,77 +625,82 @@ // CHECK9-SAME: (i64 noundef [[I:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i64 [[I]], i64* [[I_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I1]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK9-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK9-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: store i32 2, i32* [[TMP0]], align 4 +// CHECK9-NEXT: store i32 2, i32* [[TMP2]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void @@ -693,23 +710,23 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) -// CHECK9-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK9-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK9-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 8 +// CHECK9-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK9-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 2) // CHECK9-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 1) // CHECK9-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 @@ -719,17 +736,17 @@ // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done2: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK9-NEXT: ret i32 [[TMP4]] // @@ -769,189 +786,193 @@ // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: -// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 8, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK9-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5]] to i8* -// CHECK9-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP10]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false), !llvm.access.group !15 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP11:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 8, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK9-NEXT: [[TMP13:%.*]] = bitcast %struct.S.1* [[ARRAYIDX5]] to i8* +// CHECK9-NEXT: [[TMP14:%.*]] = bitcast %struct.S.1* [[TMP11]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false), !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done8: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, i32* [[F]], align 4 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1065,15 +1086,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93 // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 4 @@ -1090,6 +1113,8 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store %struct.S* undef, %struct.S** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 @@ -1107,72 +1132,72 @@ // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !7 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !7 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !7 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP9]] -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !7 -// CHECK11-NEXT: [[TMP10:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4, !llvm.access.group !7 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !7 -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* -// CHECK11-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[TMP10]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 4, i1 false), !llvm.access.group !7 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !7 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !7 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP10]] +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !7 +// CHECK11-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4, !llvm.access.group !7 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !7 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK11-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[TMP11]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false), !llvm.access.group !7 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK11-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1195,76 +1220,81 @@ // CHECK11-SAME: (i32 noundef [[I:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32 [[I]], i32* [[I_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[I_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[I_ADDR]], i32** [[TMP0]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: // CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I1]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK11-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: store i32 2, i32* [[TMP0]], align 4 +// CHECK11-NEXT: store i32 2, i32* [[TMP2]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: ret void @@ -1274,23 +1304,23 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK11-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 4 -// CHECK11-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK11-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 4 +// CHECK11-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK11-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 2) // CHECK11-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 1) // CHECK11-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 @@ -1300,17 +1330,17 @@ // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done2: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK11-NEXT: ret i32 [[TMP4]] // @@ -1350,187 +1380,191 @@ // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK11-SAME: () #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: -// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP9]] -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* -// CHECK11-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP10]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 4, i1 false), !llvm.access.group !16 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP10]] +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[TMP11:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: [[TMP13:%.*]] = bitcast %struct.S.1* [[ARRAYIDX4]] to i8* +// CHECK11-NEXT: [[TMP14:%.*]] = bitcast %struct.S.1* [[TMP11]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false), !llvm.access.group !16 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK11-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done7: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, i32* [[F]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp --- a/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp @@ -122,89 +122,94 @@ // CHECK1-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 0, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR1]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[SIVAR]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -250,89 +255,94 @@ // CHECK1-SAME: (i64 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR1]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[T_VAR1]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[T_VAR]], align 4, !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -382,88 +392,93 @@ // CHECK3-SAME: (i32 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[SIVAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[SIVAR_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 0, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR1]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[SIVAR]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -508,88 +523,93 @@ // CHECK3-SAME: (i32 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[T_VAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR1]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[T_VAR1]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[T_VAR]], align 4, !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -814,93 +834,98 @@ // CHECK9-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: store i32 0, i32* [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: store i32 0, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR1]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[SIVAR1]], i32** [[TMP11]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK9-NEXT: store i32 [[ADD2]], i32* [[SIVAR]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP13]], align 8, !llvm.access.group !4 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/for_firstprivate_codegen.cpp b/clang/test/OpenMP/for_firstprivate_codegen.cpp --- a/clang/test/OpenMP/for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/for_firstprivate_codegen.cpp @@ -421,6 +421,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* @@ -430,22 +431,30 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[T_VAR]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 8 +// CHECK1-NEXT: store %struct.S.0* [[TMP5]], %struct.S.0** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP3]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP7]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2StC2Ev @@ -514,14 +523,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -530,126 +536,128 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP8:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP9:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP4:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca %struct.S.0*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 8 +// CHECK1-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.S.0* [[TMP9]], %struct.S.0** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP6]], i8* align 4 [[TMP7]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast [2 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP9]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S.0]* [[TMP6]] to %struct.S.0* +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP8]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done6: -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP10]], %struct.St* noundef [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) #[[ATTR2]] -// CHECK1-NEXT: store %struct.S.0* [[VAR7]], %struct.S.0** [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done3: +// CHECK1-NEXT: [[TMP15:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP15]], %struct.St* noundef [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM11]] -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[ARRAYIDX12]] to i8* -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast %struct.S.0* [[TMP21]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP23]], i8* align 4 [[TMP24]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast %struct.S.0* [[TMP26]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP25]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN14]], i64 2 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP28]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP33]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done15: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done11: +// CHECK1-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP35]]) // CHECK1-NEXT: ret void // // @@ -867,11 +875,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -882,78 +890,80 @@ // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[SIVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** @g1, align 8 -// CHECK3-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** @g1, align 8 +// CHECK3-NEXT: store i32* [[TMP3]], i32** [[TMP]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load volatile i32, i32* @g, align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[G]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load volatile i32, i32* @g, align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[G1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load volatile i32, i32* @g, align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[G]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load volatile i32, i32* @g, align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[G1]], align 4 // CHECK3-NEXT: store i32* [[G1]], i32** [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[SIVAR3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK3-NEXT: store i32 1, i32* [[G]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK3-NEXT: store volatile i32 2, i32* [[TMP13]], align 4 -// CHECK3-NEXT: store i32 3, i32* [[SIVAR3]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store i32* [[G]], i32** [[TMP14]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK3-NEXT: store i32* [[TMP16]], i32** [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SIVAR3]], i32** [[TMP17]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK3-NEXT: store volatile i32 2, i32* [[TMP15]], align 4 +// CHECK3-NEXT: store i32 3, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[G]], i32** [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK3-NEXT: store i32* [[TMP18]], i32** [[TMP17]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[TMP19]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]]) // CHECK3-NEXT: ret void // // @@ -1113,19 +1123,22 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>** [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* @_ZZ4mainE5sivar) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP0]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1136,57 +1149,59 @@ // CHECK4-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[SIVAR3:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, i32 }>, align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** @g1, align 8 -// CHECK4-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** @g1, align 8 +// CHECK4-NEXT: store i32* [[TMP3]], i32** [[TMP]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load volatile i32, i32* @g, align 4 -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[G]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load volatile i32, i32* @g, align 4 -// CHECK4-NEXT: store i32 [[TMP3]], i32* [[G1]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load volatile i32, i32* @g, align 4 +// CHECK4-NEXT: store i32 [[TMP4]], i32* [[G]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load volatile i32, i32* @g, align 4 +// CHECK4-NEXT: store i32 [[TMP5]], i32* [[G1]], align 4 // CHECK4-NEXT: store i32* [[G1]], i32** [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], i32* [[SIVAR3]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], i32* [[SIVAR]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK4-NEXT: store i32 1, i32* [[G]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK4-NEXT: store volatile i32 1, i32* [[TMP13]], align 4 -// CHECK4-NEXT: store i32 2, i32* [[SIVAR3]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK4-NEXT: store volatile i32 1, i32* [[TMP15]], align 4 +// CHECK4-NEXT: store i32 2, i32* [[SIVAR]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, i32 }>* [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, i32 }>* [[BLOCK]], i32 0, i32 1 @@ -1198,34 +1213,34 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, i32 }>* [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, i32 }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[G]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP14]], i32* [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, i32 }>* [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK4-NEXT: store i32* [[TMP15]], i32** [[BLOCK_CAPTURED5]], align 8 -// CHECK4-NEXT: [[BLOCK_CAPTURED6:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, i32 }>* [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR3]], align 4 -// CHECK4-NEXT: store i32 [[TMP16]], i32* [[BLOCK_CAPTURED6]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, i32 }>* [[BLOCK]] to void ()* -// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP17]] to %struct.__block_literal_generic* -// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP19:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK4-NEXT: [[TMP20:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK4-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP21]](i8* noundef [[TMP19]]) +// CHECK4-NEXT: [[TMP16:%.*]] = load volatile i32, i32* [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP16]], i32* [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[BLOCK_CAPTURED4:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, i32 }>* [[BLOCK]], i32 0, i32 5 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK4-NEXT: store i32* [[TMP17]], i32** [[BLOCK_CAPTURED4]], align 8 +// CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, i32 }>* [[BLOCK]], i32 0, i32 7 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP18]], i32* [[BLOCK_CAPTURED5]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, i32 }>* [[BLOCK]] to void ()* +// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP19]] to %struct.__block_literal_generic* +// CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP21:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK4-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP20]], align 8 +// CHECK4-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP22]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP23]](i8* noundef [[TMP21]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/for_lastprivate_codegen.cpp b/clang/test/OpenMP/for_lastprivate_codegen.cpp --- a/clang/test/OpenMP/for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/for_lastprivate_codegen.cpp @@ -369,6 +369,10 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN2SSC1ERi(%struct.SS* noundef nonnull align 8 dereferenceable(24) [[SS]], i32* noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE5sivar) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) @@ -380,26 +384,36 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[T_VAR]], [2 x i32]* [[VEC]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[VAR]], i32* @_ZZ4mainE5sivar) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP5]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_3]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done1: +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done4: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP7]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC1ERi @@ -439,44 +453,42 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[SIVAR5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: @@ -486,105 +498,105 @@ // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 [[IDXPROM7]] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX8]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[SIVAR5]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP16]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[SIVAR5]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM2]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast [2 x %struct.S]* [[S_ARR3]] to %struct.S* -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN11]], [[TMP27]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN6]], [[TMP33]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP26]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL12:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP32]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL7:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done13: -// CHECK1-NEXT: [[CALL14:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[SIVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP28]], i32* [[TMP4]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP33]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done8: +// CHECK1-NEXT: [[CALL9:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP34]], i32* [[TMP10]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN15]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done16: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP31]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done11: +// CHECK1-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP37]]) // CHECK1-NEXT: ret void // // @@ -599,10 +611,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -614,74 +627,77 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load float, float* @f, align 4 -// CHECK1-NEXT: store float [[TMP0]], float* [[F]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load float, float* @f, align 4 +// CHECK1-NEXT: store float [[TMP1]], float* [[F]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load double, double* [[X]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00 +// CHECK1-NEXT: [[TMP10:%.*]] = load double, double* [[X]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP10]], 1.000000e+00 // CHECK1-NEXT: store double [[INC]], double* [[X]], align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP13:%.*]] = load double, double* [[X]], align 8 -// CHECK1-NEXT: store double [[TMP13]], double* @_ZN1A1xE, align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load float, float* [[F]], align 4 -// CHECK1-NEXT: store float [[TMP14]], float* @f, align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load double, double* [[X]], align 8 +// CHECK1-NEXT: store double [[TMP14]], double* @_ZN1A1xE, align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load float, float* [[F]], align 4 +// CHECK1-NEXT: store float [[TMP15]], float* @f, align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -691,78 +707,81 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[DOTF__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP1]], i64 4, i8* inttoptr (i64 3 to i8*)) +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: [[DOTF__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP2]], i64 4, i8* inttoptr (i64 3 to i8*)) // CHECK1-NEXT: [[DOTF__ADDR:%.*]] = bitcast i8* [[DOTF__VOID_ADDR]] to float* -// CHECK1-NEXT: [[TMP2:%.*]] = load float, float* @f, align 4 -// CHECK1-NEXT: store float [[TMP2]], float* [[DOTF__ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* @f, align 4 +// CHECK1-NEXT: store float [[TMP3]], float* [[DOTF__ADDR]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load double, double* @_ZN1A1xE, align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00 +// CHECK1-NEXT: [[TMP10:%.*]] = load double, double* @_ZN1A1xE, align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP10]], 1.000000e+00 // CHECK1-NEXT: store double [[INC]], double* @_ZN1A1xE, align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP13:%.*]] = load float, float* [[DOTF__ADDR]], align 4 -// CHECK1-NEXT: store float [[TMP13]], float* @f, align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load float, float* [[DOTF__ADDR]], align 4 +// CHECK1-NEXT: store float [[TMP14]], float* @f, align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast float* [[DOTF__ADDR]] to i8* -// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP1]], i8* [[TMP14]], i8* inttoptr (i64 3 to i8*)) -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast float* [[DOTF__ADDR]] to i8* +// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP2]], i8* [[TMP15]], i8* inttoptr (i64 3 to i8*)) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -773,70 +792,72 @@ // CHECK1-NEXT: [[CNT:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[DOTCNT__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP1]], i64 1, i8* inttoptr (i64 3 to i8*)) -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: [[DOTCNT__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP2]], i64 1, i8* inttoptr (i64 3 to i8*)) +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV]], i8* [[DOTCNT__VOID_ADDR]], align 1 -// CHECK1-NEXT: [[TMP8:%.*]] = load double, double* @_ZN1A1xE, align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP8]], 1.000000e+00 +// CHECK1-NEXT: [[TMP9:%.*]] = load double, double* @_ZN1A1xE, align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00 // CHECK1-NEXT: store double [[INC]], double* @_ZN1A1xE, align 8 // CHECK1-NEXT: store float 0.000000e+00, float* [[F]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: // CHECK1-NEXT: store i8 2, i8* [[DOTCNT__VOID_ADDR]], align 1 -// CHECK1-NEXT: [[TMP12:%.*]] = load i8, i8* [[DOTCNT__VOID_ADDR]], align 1 -// CHECK1-NEXT: store i8 [[TMP12]], i8* @cnt, align 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load float, float* [[F]], align 4 -// CHECK1-NEXT: store float [[TMP13]], float* @f, align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i8, i8* [[DOTCNT__VOID_ADDR]], align 1 +// CHECK1-NEXT: store i8 [[TMP13]], i8* @cnt, align 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load float, float* [[F]], align 4 +// CHECK1-NEXT: store float [[TMP14]], float* @f, align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP1]], i8* [[DOTCNT__VOID_ADDR]], i8* inttoptr (i64 3 to i8*)) -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP2]], i8* [[DOTCNT__VOID_ADDR]], i8* inttoptr (i64 3 to i8*)) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -844,38 +865,47 @@ // CHECK1-SAME: () #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_4:%.*]], align 4 // CHECK1-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 128 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.4], align 128 +// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.4*, align 128 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN3SSTIiEC1Ev(%struct.SST* noundef nonnull align 4 dereferenceable(4) [[SST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 128 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP0]], i8* align 128 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK1-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 128 -// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 128 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[T_VAR]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP1]]) +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], [2 x %struct.S.4]* [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_4]], %struct.S.4* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK1-NEXT: store %struct.S.4* [[TEST]], %struct.S.4** [[VAR]], align 128 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S.4]* [[S_ARR]], [2 x %struct.S.4]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S.4*, %struct.S.4** [[VAR]], align 128 +// CHECK1-NEXT: store %struct.S.4* [[TMP5]], %struct.S.4** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], [2 x %struct.S.4]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_4]], %struct.S.4* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.4* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_4]], %struct.S.4* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.4* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP3]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP7]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -883,6 +913,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 @@ -906,7 +937,9 @@ // CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 3 // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[D_ADDR]], align 8 // CHECK1-NEXT: store i32* [[TMP1]], i32** [[C]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP2]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32* [[TMP]], i32** [[_TMP2]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 @@ -914,44 +947,44 @@ // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store i32* [[A3]], i32** [[_TMP4]], align 8 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP9]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP10]], align 4 // CHECK1-NEXT: [[B6:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2 // CHECK1-NEXT: [[BF_LOAD7:%.*]] = load i8, i8* [[B6]], align 8 // CHECK1-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD7]], 4 // CHECK1-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 4 // CHECK1-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 // CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[BF_CAST]], -1 -// CHECK1-NEXT: [[TMP11:%.*]] = trunc i32 [[DEC]] to i8 +// CHECK1-NEXT: [[TMP12:%.*]] = trunc i32 [[DEC]] to i8 // CHECK1-NEXT: [[BF_LOAD8:%.*]] = load i8, i8* [[B6]], align 8 -// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP11]], 15 +// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP12]], 15 // CHECK1-NEXT: [[BF_CLEAR9:%.*]] = and i8 [[BF_LOAD8]], -16 // CHECK1-NEXT: [[BF_SET10:%.*]] = or i8 [[BF_CLEAR9]], [[BF_VALUE]] // CHECK1-NEXT: store i8 [[BF_SET10]], i8* [[B6]], align 8 @@ -959,16 +992,16 @@ // CHECK1-NEXT: [[BF_RESULT_ASHR:%.*]] = ashr i8 [[BF_RESULT_SHL]], 4 // CHECK1-NEXT: [[BF_RESULT_CAST:%.*]] = sext i8 [[BF_RESULT_ASHR]] to i32 // CHECK1-NEXT: [[C11:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[C11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[DIV]], i32* [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[C11]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP14]], 1 +// CHECK1-NEXT: store i32 [[DIV]], i32* [[TMP13]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -980,11 +1013,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[E:%.*]] = alloca [4 x i8]*, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -1008,112 +1041,114 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 1 // CHECK1-NEXT: store [4 x i8]* [[E1]], [4 x i8]** [[E]], align 8 -// CHECK1-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store i32* [[A2]], i32** [[A]], align 8 -// CHECK1-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C3]], align 8 -// CHECK1-NEXT: store i32* [[TMP1]], i32** [[C]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A]], align 8 -// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C]], align 8 -// CHECK1-NEXT: store i32* [[TMP3]], i32** [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load [4 x i8]*, [4 x i8]** [[E]], align 8 -// CHECK1-NEXT: store [4 x i8]* [[TMP4]], [4 x i8]** [[_TMP5]], align 8 +// CHECK1-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C3]], align 8 +// CHECK1-NEXT: store i32* [[TMP3]], i32** [[C]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A]], align 8 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[C]], align 8 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load [4 x i8]*, [4 x i8]** [[E]], align 8 +// CHECK1-NEXT: store [4 x i8]* [[TMP6]], [4 x i8]** [[_TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [4 x i8]* [[E7]] to i8* -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [4 x i8]* [[TMP5]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP6]], i8* align 1 [[TMP7]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP7:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast [4 x i8]* [[E7]] to i8* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [4 x i8]* [[TMP7]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP8]], i8* align 1 [[TMP9]], i64 4, i1 false) // CHECK1-NEXT: store [4 x i8]* [[E7]], [4 x i8]** [[_TMP8]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK1-NEXT: store i32* [[A9]], i32** [[_TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP4]], align 8 // CHECK1-NEXT: store i32* [[C12]], i32** [[_TMP13]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP5]], align 8 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP5]], align 8 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[_TMP10]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP19]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[B11]], align 4 -// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP21]], -1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[_TMP10]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[B11]], align 4 +// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP23]], -1 // CHECK1-NEXT: store i32 [[DEC]], i32* [[B11]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[_TMP13]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP23]], 1 -// CHECK1-NEXT: store i32 [[DIV]], i32* [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[_TMP13]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[DIV]], i32* [[TMP24]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK1-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[_TMP10]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK1-NEXT: store i32 [[TMP28]], i32* [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[B11]], align 4 -// CHECK1-NEXT: store i32 [[TMP29]], i32* [[B]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32*, i32** [[_TMP13]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -// CHECK1-NEXT: store i32 [[TMP31]], i32* [[TMP11]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP8]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = load [4 x i8], [4 x i8]* [[TMP32]], align 1 -// CHECK1-NEXT: store [4 x i8] [[TMP33]], [4 x i8]* [[TMP12]], align 1 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[B11]], align 4 -// CHECK1-NEXT: [[B16:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP35:%.*]] = trunc i32 [[TMP34]] to i8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32*, i32** [[_TMP10]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], i32* [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[B11]], align 4 +// CHECK1-NEXT: store i32 [[TMP31]], i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32*, i32** [[_TMP13]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK1-NEXT: store i32 [[TMP33]], i32* [[TMP13]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP8]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load [4 x i8], [4 x i8]* [[TMP34]], align 1 +// CHECK1-NEXT: store [4 x i8] [[TMP35]], [4 x i8]* [[TMP14]], align 1 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[B11]], align 4 +// CHECK1-NEXT: [[B16:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP37:%.*]] = trunc i32 [[TMP36]] to i8 // CHECK1-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[B16]], align 8 -// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP35]], 15 +// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP37]], 15 // CHECK1-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16 // CHECK1-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK1-NEXT: store i8 [[BF_SET]], i8* [[B16]], align 8 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]]) // CHECK1-NEXT: ret void // // @@ -1152,12 +1187,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.4*, align 8 +// CHECK1-NEXT: store %struct.S.4* [[THIS]], %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.4*, %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK1-NEXT: ret void // // @@ -1172,185 +1207,184 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.4*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.4* [[THIS]], %struct.S.4** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.4*, %struct.S.4** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.4*, align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca i32, align 128 -// CHECK1-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 128 -// CHECK1-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK1-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.4], align 128 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_4:%.*]], align 128 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca %struct.S.4*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.4]*, [2 x %struct.S.4]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.4*, %struct.S.4** [[TMP7]], align 8 +// CHECK1-NEXT: store %struct.S.4* [[TMP8]], %struct.S.4** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load %struct.S.4*, %struct.S.4** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.S.4* [[TMP9]], %struct.S.4** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], [2 x %struct.S.4]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_4]], %struct.S.4* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.4* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_4]], %struct.S.4* [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.4* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK1-NEXT: store %struct.S.0* [[VAR6]], %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.S.4*, %struct.S.4** [[_TMP1]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store %struct.S.4* [[VAR]], %struct.S.4** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR3]], align 128 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX10]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP16]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR]], align 128 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load %struct.S.4*, %struct.S.4** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.4], [2 x %struct.S.4]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.4* @_ZN1SIiEaSERKS0_(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX6]], %struct.S.4* noundef nonnull align 4 dereferenceable(4) [[TMP21]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR3]], align 128 -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[TMP0]], align 128 -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP24]], i8* align 128 [[TMP25]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR5]] to %struct.S.0* -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN12]], [[TMP27]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 128 +// CHECK1-NEXT: store i32 [[TMP28]], i32* [[TMP2]], align 128 +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP29]], i8* align 128 [[TMP30]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.4], [2 x %struct.S.4]* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast [2 x %struct.S.4]* [[S_ARR]] to %struct.S.4* +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_S_4]], %struct.S.4* [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.4* [[ARRAY_BEGIN8]], [[TMP32]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP26]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL13:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done14: -// CHECK1-NEXT: [[TMP28:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP28]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.4* [ [[TMP31]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.4* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL9:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.4* @_ZN1SIiEaSERKS0_(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.4* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_4]], %struct.S.4* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_4]], %struct.S.4* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.4* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP32]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done10: +// CHECK1-NEXT: [[TMP33:%.*]] = load %struct.S.4*, %struct.S.4** [[_TMP3]], align 8 +// CHECK1-NEXT: [[CALL11:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.4* @_ZN1SIiEaSERKS0_(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[TMP10]], %struct.S.4* noundef nonnull align 4 dereferenceable(4) [[TMP33]]) // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN16:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN16]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.4], [2 x %struct.S.4]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_4]], %struct.S.4* [[ARRAY_BEGIN12]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN16]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE17:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done17: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP31]]) +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.4* [ [[TMP34]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_4]], %struct.S.4* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.4* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP36]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.4*, align 8 +// CHECK1-NEXT: store %struct.S.4* [[THIS]], %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.4*, %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.4*, align 8 +// CHECK1-NEXT: store %struct.S.4* [[THIS]], %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.4*, %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_4:%.*]], %struct.S.4* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, i32* [[F]], align 4 // CHECK1-NEXT: ret void // @@ -1359,6 +1393,7 @@ // CHECK1-SAME: (%struct.SST* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 @@ -1373,7 +1408,9 @@ // CHECK1-NEXT: [[THIS1:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, i32* [[A]], align 4 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SST* [[THIS1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SST* [[THIS1]], %struct.SST** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32* [[TMP]], i32** [[_TMP2]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 @@ -1381,41 +1418,41 @@ // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store i32* [[A3]], i32** [[_TMP4]], align 8 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP7]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP8]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -1427,11 +1464,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SST* noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1445,94 +1482,96 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SST*, %struct.SST** [[TMP1]], align 8 +// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store i32* [[A1]], i32** [[A]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A]], align 8 -// CHECK1-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[A]], align 8 +// CHECK1-NEXT: store i32* [[TMP3]], i32** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK1-NEXT: store i32* [[A3]], i32** [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP13]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[TMP4]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.4*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.4* [[THIS]], %struct.S.4** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.4*, %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_4:%.*]], %struct.S.4* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.4*, align 8 +// CHECK1-NEXT: store %struct.S.4* [[THIS]], %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.4*, %struct.S.4** [[THIS_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -1568,6 +1607,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 @@ -1592,7 +1632,9 @@ // CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 3 // CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[D_ADDR]], align 8 // CHECK3-NEXT: store i32* [[TMP1]], i32** [[C]], align 8 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP2]], align 8 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: store i32* [[TMP]], i32** [[_TMP2]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 @@ -1600,43 +1642,43 @@ // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store i32* [[A3]], i32** [[_TMP4]], align 8 // CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK3-NEXT: store i32 [[ADD]], i32* [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK3-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP4]], align 8 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP4]], align 8 +// CHECK3-NEXT: store i32* [[TMP12]], i32** [[TMP11]], align 8 // CHECK3-NEXT: call void @_ZZN2SSC1ERiENKUlvE0_clEv(%class.anon.1* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: @@ -1648,11 +1690,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK3-NEXT: [[E:%.*]] = alloca [4 x i8]*, align 8 // CHECK3-NEXT: [[A:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -1677,112 +1719,114 @@ // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK3-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 1 // CHECK3-NEXT: store [4 x i8]* [[E1]], [4 x i8]** [[E]], align 8 -// CHECK3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store i32* [[A2]], i32** [[A]], align 8 -// CHECK3-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C3]], align 8 -// CHECK3-NEXT: store i32* [[TMP1]], i32** [[C]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A]], align 8 -// CHECK3-NEXT: store i32* [[TMP2]], i32** [[TMP]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C]], align 8 -// CHECK3-NEXT: store i32* [[TMP3]], i32** [[_TMP4]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load [4 x i8]*, [4 x i8]** [[E]], align 8 -// CHECK3-NEXT: store [4 x i8]* [[TMP4]], [4 x i8]** [[_TMP5]], align 8 +// CHECK3-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C3]], align 8 +// CHECK3-NEXT: store i32* [[TMP3]], i32** [[C]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A]], align 8 +// CHECK3-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[C]], align 8 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[_TMP4]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = load [4 x i8]*, [4 x i8]** [[E]], align 8 +// CHECK3-NEXT: store [4 x i8]* [[TMP6]], [4 x i8]** [[_TMP5]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP5]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast [4 x i8]* [[E7]] to i8* -// CHECK3-NEXT: [[TMP7:%.*]] = bitcast [4 x i8]* [[TMP5]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP6]], i8* align 1 [[TMP7]], i64 4, i1 false) +// CHECK3-NEXT: [[TMP7:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP5]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast [4 x i8]* [[E7]] to i8* +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast [4 x i8]* [[TMP7]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP8]], i8* align 1 [[TMP9]], i64 4, i1 false) // CHECK3-NEXT: store [4 x i8]* [[E7]], [4 x i8]** [[_TMP8]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]]) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK3-NEXT: store i32* [[A9]], i32** [[_TMP10]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP4]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP4]], align 8 // CHECK3-NEXT: store i32* [[C12]], i32** [[_TMP13]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP5]], align 8 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP5]], align 8 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP19]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[_TMP10]], align 8 -// CHECK3-NEXT: store i32* [[TMP21]], i32** [[TMP20]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[B11]], i32** [[TMP22]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32*, i32** [[_TMP13]], align 8 -// CHECK3-NEXT: store i32* [[TMP24]], i32** [[TMP23]], align 8 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP21]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32*, i32** [[_TMP10]], align 8 +// CHECK3-NEXT: store i32* [[TMP23]], i32** [[TMP22]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[B11]], i32** [[TMP24]], align 8 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[_TMP13]], align 8 +// CHECK3-NEXT: store i32* [[TMP26]], i32** [[TMP25]], align 8 // CHECK3-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK3-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]]) -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK3-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32*, i32** [[_TMP10]], align 8 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 -// CHECK3-NEXT: store i32 [[TMP29]], i32* [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[B11]], align 4 -// CHECK3-NEXT: store i32 [[TMP30]], i32* [[B]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32*, i32** [[_TMP13]], align 8 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK3-NEXT: store i32 [[TMP32]], i32* [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP8]], align 8 -// CHECK3-NEXT: [[TMP34:%.*]] = load [4 x i8], [4 x i8]* [[TMP33]], align 1 -// CHECK3-NEXT: store [4 x i8] [[TMP34]], [4 x i8]* [[TMP12]], align 1 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[B11]], align 4 -// CHECK3-NEXT: [[B16:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP36:%.*]] = trunc i32 [[TMP35]] to i8 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32*, i32** [[_TMP10]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK3-NEXT: store i32 [[TMP31]], i32* [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[B11]], align 4 +// CHECK3-NEXT: store i32 [[TMP32]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32*, i32** [[_TMP13]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK3-NEXT: store i32 [[TMP34]], i32* [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP8]], align 8 +// CHECK3-NEXT: [[TMP36:%.*]] = load [4 x i8], [4 x i8]* [[TMP35]], align 1 +// CHECK3-NEXT: store [4 x i8] [[TMP36]], [4 x i8]* [[TMP14]], align 1 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[B11]], align 4 +// CHECK3-NEXT: [[B16:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP38:%.*]] = trunc i32 [[TMP37]] to i8 // CHECK3-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[B16]], align 8 -// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP36]], 15 +// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP38]], 15 // CHECK3-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16 // CHECK3-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK3-NEXT: store i8 [[BF_SET]], i8* [[B16]], align 8 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) // CHECK3-NEXT: ret void // // @@ -1790,6 +1834,7 @@ // CHECK3-SAME: (%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR2:[0-9]+]] align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.0*, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK3-NEXT: store %class.anon.0* [[THIS]], %class.anon.0** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load %class.anon.0*, %class.anon.0** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], %class.anon.0* [[THIS1]], i32 0, i32 0 @@ -1809,13 +1854,21 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[DIV]], i32* [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP1]], %struct.SS** [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1 // CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[TMP15]], align 8 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.SS* [[TMP1]], i32* [[TMP12]], i32* [[TMP14]], i32* [[TMP16]]) +// CHECK3-NEXT: store i32* [[TMP14]], i32** [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[TMP16]], align 8 +// CHECK3-NEXT: store i32* [[TMP17]], i32** [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP19]], align 8 +// CHECK3-NEXT: store i32* [[TMP20]], i32** [[TMP18]], align 8 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // @@ -1823,6 +1876,7 @@ // CHECK3-SAME: (%class.anon.1* noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR2]] align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.1*, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK3-NEXT: store %class.anon.1* [[THIS]], %class.anon.1** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load %class.anon.1*, %class.anon.1** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], %class.anon.1* [[THIS1]], i32 0, i32 0 @@ -1852,21 +1906,22 @@ // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 2 // CHECK3-NEXT: store i32 [[MUL]], i32* [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP8]], align 8 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP1]], i32* [[TMP9]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP1]], %struct.SS** [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP10]], align 8 +// CHECK3-NEXT: store i32* [[TMP11]], i32** [[TMP9]], align 8 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 @@ -1877,111 +1932,112 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A5:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP5:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP6:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[B7:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C8:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP9:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[B_ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 -// CHECK3-NEXT: store i32* [[TMP3]], i32** [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK3-NEXT: store i32* [[TMP4]], i32** [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[_TMP1]], align 8 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[_TMP3]], align 8 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK3-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 8 +// CHECK3-NEXT: store i32* [[TMP8]], i32** [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK3-NEXT: store i32* [[TMP9]], i32** [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK3-NEXT: store i32* [[TMP10]], i32** [[_TMP3]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK3-NEXT: store i32* [[A5]], i32** [[_TMP6]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK3-NEXT: store i32* [[C8]], i32** [[_TMP9]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK3-NEXT: store i32* [[A]], i32** [[_TMP5]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK3-NEXT: store i32* [[C]], i32** [[_TMP6]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP14]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[_TMP6]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[B7]], align 4 -// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP18]], -1 -// CHECK3-NEXT: store i32 [[DEC]], i32* [[B7]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[_TMP9]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[DIV]], i32* [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[_TMP5]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP23]], -1 +// CHECK3-NEXT: store i32 [[DEC]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32*, i32** [[_TMP6]], align 8 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP25]], 1 +// CHECK3-NEXT: store i32 [[DIV]], i32* [[TMP24]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK3-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP14]]) +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32*, i32** [[_TMP6]], align 8 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK3-NEXT: store i32 [[TMP25]], i32* [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[B7]], align 4 -// CHECK3-NEXT: store i32 [[TMP26]], i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[_TMP9]], align 8 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK3-NEXT: store i32 [[TMP28]], i32* [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32*, i32** [[_TMP5]], align 8 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], i32* [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: store i32 [[TMP31]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32*, i32** [[_TMP6]], align 8 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK3-NEXT: store i32 [[TMP33]], i32* [[TMP12]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 @@ -1995,95 +2051,97 @@ // CHECK3-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK3-NEXT: store i32* [[TMP2]], i32** [[_TMP1]], align 8 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK3-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[_TMP1]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[B3]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[B3]], align 4 -// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP13]], -1 -// CHECK3-NEXT: store i32 [[DEC]], i32* [[B3]], align 4 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[C]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[_TMP1]], align 8 // CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 1 -// CHECK3-NEXT: store i32 [[DIV]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[B3]], align 4 +// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP16]], -1 +// CHECK3-NEXT: store i32 [[DEC]], i32* [[B3]], align 4 +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[C]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP18]], 1 +// CHECK3-NEXT: store i32 [[DIV]], i32* [[TMP17]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: // CHECK3-NEXT: store i32 2, i32* [[B3]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[B3]], align 4 -// CHECK3-NEXT: store i32 [[TMP19]], i32* [[B]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[B3]], align 4 -// CHECK3-NEXT: [[B7:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i8 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[B3]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[B3]], align 4 +// CHECK3-NEXT: [[B7:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP24:%.*]] = trunc i32 [[TMP23]] to i8 // CHECK3-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[B7]], align 8 -// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP21]], 15 +// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP24]], 15 // CHECK3-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16 // CHECK3-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK3-NEXT: store i8 [[BF_SET]], i8* [[B7]], align 8 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2094,86 +2152,88 @@ // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[SIVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_2:%.*]], align 8 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_5:%.*]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** @g1, align 8 -// CHECK3-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** @g1, align 8 +// CHECK3-NEXT: store i32* [[TMP3]], i32** [[TMP]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** @g1, align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** @g1, align 8 // CHECK3-NEXT: store i32* [[G1]], i32** [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK3-NEXT: store i32 1, i32* [[G]], align 128 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK3-NEXT: store volatile i32 1, i32* [[TMP11]], align 4 -// CHECK3-NEXT: store i32 2, i32* [[SIVAR3]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store i32* [[G]], i32** [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK3-NEXT: store i32* [[TMP14]], i32** [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SIVAR3]], i32** [[TMP15]], align 8 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.2* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK3-NEXT: store volatile i32 1, i32* [[TMP13]], align 4 +// CHECK3-NEXT: store i32 2, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[G]], i32** [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK3-NEXT: store i32* [[TMP16]], i32** [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[TMP17]], align 8 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.5* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[G]], align 128 -// CHECK3-NEXT: store volatile i32 [[TMP19]], i32* @g, align 128 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP21]], i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[SIVAR3]], align 4 -// CHECK3-NEXT: store i32 [[TMP22]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[G]], align 128 +// CHECK3-NEXT: store volatile i32 [[TMP21]], i32* @g, align 128 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK3-NEXT: store volatile i32 [[TMP23]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], i32* [[TMP2]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK3-NEXT: ret void // // @@ -2226,19 +2286,22 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>** [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* @_ZZ4mainE5sivar) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP0]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2249,55 +2312,57 @@ // CHECK4-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK4-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[SIVAR3:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, [84 x i8], i32 }>, align 128 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** @g1, align 8 -// CHECK4-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** @g1, align 8 +// CHECK4-NEXT: store i32* [[TMP3]], i32** [[TMP]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** @g1, align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** @g1, align 8 // CHECK4-NEXT: store i32* [[G1]], i32** [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK4-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK4-NEXT: store i32 1, i32* [[G]], align 128 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK4-NEXT: store volatile i32 1, i32* [[TMP11]], align 4 -// CHECK4-NEXT: store i32 2, i32* [[SIVAR3]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK4-NEXT: store volatile i32 1, i32* [[TMP13]], align 4 +// CHECK4-NEXT: store i32 2, i32* [[SIVAR]], align 4 // CHECK4-NEXT: store i32 1, i32* [[G]], align 128 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK4-NEXT: store volatile i32 1, i32* [[TMP12]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK4-NEXT: store volatile i32 1, i32* [[TMP14]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, [84 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, [84 x i8], i32 }>* [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 128 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, [84 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, [84 x i8], i32 }>* [[BLOCK]], i32 0, i32 1 @@ -2309,47 +2374,47 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, [84 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, [84 x i8], i32 }>* [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, [84 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, [84 x i8], i32 }>* [[BLOCK]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP13:%.*]] = load volatile i32, i32* [[G]], align 128 -// CHECK4-NEXT: store volatile i32 [[TMP13]], i32* [[BLOCK_CAPTURED]], align 128 -// CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, [84 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, [84 x i8], i32 }>* [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK4-NEXT: store i32* [[TMP14]], i32** [[BLOCK_CAPTURED5]], align 32 -// CHECK4-NEXT: [[BLOCK_CAPTURED6:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, [84 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, [84 x i8], i32 }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[SIVAR3]], align 4 -// CHECK4-NEXT: store i32 [[TMP15]], i32* [[BLOCK_CAPTURED6]], align 8 -// CHECK4-NEXT: [[TMP16:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, [84 x i8], i32 }>* [[BLOCK]] to void ()* -// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP16]] to %struct.__block_literal_generic* -// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP18:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK4-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP17]], align 8 -// CHECK4-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP20]](i8* noundef [[TMP18]]) +// CHECK4-NEXT: [[TMP15:%.*]] = load volatile i32, i32* [[G]], align 128 +// CHECK4-NEXT: store volatile i32 [[TMP15]], i32* [[BLOCK_CAPTURED]], align 128 +// CHECK4-NEXT: [[BLOCK_CAPTURED4:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, [84 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, [84 x i8], i32 }>* [[BLOCK]], i32 0, i32 5 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK4-NEXT: store i32* [[TMP16]], i32** [[BLOCK_CAPTURED4]], align 32 +// CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, [84 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, [84 x i8], i32 }>* [[BLOCK]], i32 0, i32 6 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP17]], i32* [[BLOCK_CAPTURED5]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32, [84 x i8], i32 }>* [[BLOCK]] to void ()* +// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP18]] to %struct.__block_literal_generic* +// CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP20:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK4-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP19]], align 8 +// CHECK4-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP22]](i8* noundef [[TMP20]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK4-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK4-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[G]], align 128 -// CHECK4-NEXT: store volatile i32 [[TMP24]], i32* @g, align 128 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP26]], i32* [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[SIVAR3]], align 4 -// CHECK4-NEXT: store i32 [[TMP27]], i32* [[TMP0]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[G]], align 128 +// CHECK4-NEXT: store volatile i32 [[TMP26]], i32* @g, align 128 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP28]], i32* [[TMP4]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP29]], i32* [[TMP2]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK4: .omp.lastprivate.done: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]]) // CHECK4-NEXT: ret void // // @@ -2376,6 +2441,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 @@ -2400,7 +2466,9 @@ // CHECK4-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 3 // CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[D_ADDR]], align 8 // CHECK4-NEXT: store i32* [[TMP1]], i32** [[C]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]]) +// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP2]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: store i32* [[TMP]], i32** [[_TMP2]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 @@ -2408,31 +2476,31 @@ // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: store i32* [[A3]], i32** [[_TMP4]], align 8 // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK4-NEXT: store i32 [[ADD]], i32* [[TMP8]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP4]], align 8 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[TMP9]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32* }>* [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32* }>* [[BLOCK]], i32 0, i32 1 @@ -2446,21 +2514,21 @@ // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS_ADDR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32* }>* [[BLOCK]], i32 0, i32 5 // CHECK4-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[BLOCK_CAPTURED_THIS_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32* }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK4-NEXT: store i32* [[TMP9]], i32** [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32* }>* [[BLOCK]] to void ()* -// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP10]] to %struct.__block_literal_generic* -// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP12:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK4-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP11]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP14]](i8* noundef [[TMP12]]) +// CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP4]], align 8 +// CHECK4-NEXT: store i32* [[TMP10]], i32** [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32* }>* [[BLOCK]] to void ()* +// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP11]] to %struct.__block_literal_generic* +// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP13:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP12]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP15]](i8* noundef [[TMP13]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: @@ -2472,11 +2540,11 @@ // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK4-NEXT: [[E:%.*]] = alloca [4 x i8]*, align 8 // CHECK4-NEXT: [[A:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -2501,61 +2569,63 @@ // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK4-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 1 // CHECK4-NEXT: store [4 x i8]* [[E1]], [4 x i8]** [[E]], align 8 -// CHECK4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 0 // CHECK4-NEXT: store i32* [[A2]], i32** [[A]], align 8 -// CHECK4-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C3]], align 8 -// CHECK4-NEXT: store i32* [[TMP1]], i32** [[C]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A]], align 8 -// CHECK4-NEXT: store i32* [[TMP2]], i32** [[TMP]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C]], align 8 -// CHECK4-NEXT: store i32* [[TMP3]], i32** [[_TMP4]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load [4 x i8]*, [4 x i8]** [[E]], align 8 -// CHECK4-NEXT: store [4 x i8]* [[TMP4]], [4 x i8]** [[_TMP5]], align 8 +// CHECK4-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C3]], align 8 +// CHECK4-NEXT: store i32* [[TMP3]], i32** [[C]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A]], align 8 +// CHECK4-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[C]], align 8 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[_TMP4]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = load [4 x i8]*, [4 x i8]** [[E]], align 8 +// CHECK4-NEXT: store [4 x i8]* [[TMP6]], [4 x i8]** [[_TMP5]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP5]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = bitcast [4 x i8]* [[E7]] to i8* -// CHECK4-NEXT: [[TMP7:%.*]] = bitcast [4 x i8]* [[TMP5]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP6]], i8* align 1 [[TMP7]], i64 4, i1 false) +// CHECK4-NEXT: [[TMP7:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP5]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = bitcast [4 x i8]* [[E7]] to i8* +// CHECK4-NEXT: [[TMP9:%.*]] = bitcast [4 x i8]* [[TMP7]] to i8* +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP8]], i8* align 1 [[TMP9]], i64 4, i1 false) // CHECK4-NEXT: store [4 x i8]* [[E7]], [4 x i8]** [[_TMP8]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]]) -// CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]]) +// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK4-NEXT: store i32* [[A9]], i32** [[_TMP10]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP4]], align 8 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP4]], align 8 // CHECK4-NEXT: store i32* [[C12]], i32** [[_TMP13]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP5]], align 8 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 +// CHECK4-NEXT: [[TMP14:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP5]], align 8 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK4-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 0 @@ -2569,61 +2639,61 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.4 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS_ADDR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[BLOCK_CAPTURED_THIS_ADDR]], align 8 +// CHECK4-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[BLOCK_CAPTURED_THIS_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32*, i32** [[_TMP10]], align 8 -// CHECK4-NEXT: store i32* [[TMP19]], i32** [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32*, i32** [[_TMP10]], align 8 +// CHECK4-NEXT: store i32* [[TMP21]], i32** [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED15:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[B11]], align 4 -// CHECK4-NEXT: store i32 [[TMP20]], i32* [[BLOCK_CAPTURED15]], align 8 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[B11]], align 4 +// CHECK4-NEXT: store i32 [[TMP22]], i32* [[BLOCK_CAPTURED15]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED16:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32*, i32** [[_TMP13]], align 8 -// CHECK4-NEXT: store i32* [[TMP21]], i32** [[BLOCK_CAPTURED16]], align 8 -// CHECK4-NEXT: [[TMP22:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]] to void ()* -// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP22]] to %struct.__block_literal_generic* -// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP24:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK4-NEXT: [[TMP25:%.*]] = load i8*, i8** [[TMP23]], align 8 -// CHECK4-NEXT: [[TMP26:%.*]] = bitcast i8* [[TMP25]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP26]](i8* noundef [[TMP24]]) +// CHECK4-NEXT: [[TMP23:%.*]] = load i32*, i32** [[_TMP13]], align 8 +// CHECK4-NEXT: store i32* [[TMP23]], i32** [[BLOCK_CAPTURED16]], align 8 +// CHECK4-NEXT: [[TMP24:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]] to void ()* +// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP24]] to %struct.__block_literal_generic* +// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP26:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK4-NEXT: [[TMP27:%.*]] = load i8*, i8** [[TMP25]], align 8 +// CHECK4-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP27]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP28]](i8* noundef [[TMP26]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK4-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]]) -// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK4-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK4-NEXT: br i1 [[TMP31]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: [[TMP30:%.*]] = load i32*, i32** [[_TMP10]], align 8 -// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -// CHECK4-NEXT: store i32 [[TMP31]], i32* [[TMP10]], align 4 -// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[B11]], align 4 -// CHECK4-NEXT: store i32 [[TMP32]], i32* [[B]], align 4 -// CHECK4-NEXT: [[TMP33:%.*]] = load i32*, i32** [[_TMP13]], align 8 -// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK4-NEXT: store i32 [[TMP34]], i32* [[TMP11]], align 4 -// CHECK4-NEXT: [[TMP35:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP8]], align 8 -// CHECK4-NEXT: [[TMP36:%.*]] = load [4 x i8], [4 x i8]* [[TMP35]], align 1 -// CHECK4-NEXT: store [4 x i8] [[TMP36]], [4 x i8]* [[TMP12]], align 1 -// CHECK4-NEXT: [[TMP37:%.*]] = load i32, i32* [[B11]], align 4 -// CHECK4-NEXT: [[B18:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP38:%.*]] = trunc i32 [[TMP37]] to i8 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32*, i32** [[_TMP10]], align 8 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK4-NEXT: store i32 [[TMP33]], i32* [[TMP12]], align 4 +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[B11]], align 4 +// CHECK4-NEXT: store i32 [[TMP34]], i32* [[B]], align 4 +// CHECK4-NEXT: [[TMP35:%.*]] = load i32*, i32** [[_TMP13]], align 8 +// CHECK4-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK4-NEXT: store i32 [[TMP36]], i32* [[TMP13]], align 4 +// CHECK4-NEXT: [[TMP37:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP8]], align 8 +// CHECK4-NEXT: [[TMP38:%.*]] = load [4 x i8], [4 x i8]* [[TMP37]], align 1 +// CHECK4-NEXT: store [4 x i8] [[TMP38]], [4 x i8]* [[TMP14]], align 1 +// CHECK4-NEXT: [[TMP39:%.*]] = load i32, i32* [[B11]], align 4 +// CHECK4-NEXT: [[B18:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP40:%.*]] = trunc i32 [[TMP39]] to i8 // CHECK4-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[B18]], align 8 -// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP38]], 15 +// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP40]], 15 // CHECK4-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16 // CHECK4-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK4-NEXT: store i8 [[BF_SET]], i8* [[B18]], align 8 // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK4: .omp.lastprivate.done: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]]) // CHECK4-NEXT: ret void // // @@ -2632,6 +2702,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>** [[BLOCK_ADDR]], align 8 @@ -2651,24 +2722,29 @@ // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 1 // CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR3]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR3]], align 8 +// CHECK4-NEXT: store i32* [[TMP7]], i32** [[TMP6]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR4:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 8 +// CHECK4-NEXT: store i32* [[BLOCK_CAPTURE_ADDR4]], i32** [[TMP8]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR5:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR5]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.SS* [[THIS]], i32* [[TMP5]], i32* [[BLOCK_CAPTURE_ADDR4]], i32* [[TMP6]]) +// CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR5]], align 8 +// CHECK4-NEXT: store i32* [[TMP10]], i32** [[TMP9]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 @@ -2679,101 +2755,103 @@ // CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[A5:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[_TMP5:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP6:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[B7:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C8:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[_TMP9:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 -// CHECK4-NEXT: store i32* [[TMP3]], i32** [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK4-NEXT: store i32* [[TMP4]], i32** [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[_TMP1]], align 8 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[_TMP3]], align 8 +// CHECK4-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK4-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 8 +// CHECK4-NEXT: store i32* [[TMP8]], i32** [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK4-NEXT: store i32* [[TMP9]], i32** [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK4-NEXT: store i32* [[TMP10]], i32** [[_TMP3]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK4-NEXT: store i32* [[A5]], i32** [[_TMP6]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK4-NEXT: store i32* [[C8]], i32** [[_TMP9]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK4-NEXT: store i32* [[A]], i32** [[_TMP5]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK4-NEXT: store i32* [[C]], i32** [[_TMP6]], align 8 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK4-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32*, i32** [[_TMP6]], align 8 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK4-NEXT: store i32 [[INC]], i32* [[TMP16]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[B7]], align 4 -// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP18]], -1 -// CHECK4-NEXT: store i32 [[DEC]], i32* [[B7]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32*, i32** [[_TMP9]], align 8 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP20]], 1 -// CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP19]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32*, i32** [[_TMP5]], align 8 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK4-NEXT: store i32 [[INC]], i32* [[TMP21]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[B]], align 4 +// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP23]], -1 +// CHECK4-NEXT: store i32 [[DEC]], i32* [[B]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32*, i32** [[_TMP6]], align 8 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP25]], 1 +// CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP24]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK4-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]]) -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK4-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK4-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32*, i32** [[_TMP6]], align 8 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK4-NEXT: store i32 [[TMP25]], i32* [[TMP6]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[B7]], align 4 -// CHECK4-NEXT: store i32 [[TMP26]], i32* [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32*, i32** [[_TMP9]], align 8 -// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK4-NEXT: store i32 [[TMP28]], i32* [[TMP7]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = load i32*, i32** [[_TMP5]], align 8 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK4-NEXT: store i32 [[TMP30]], i32* [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[B]], align 4 +// CHECK4-NEXT: store i32 [[TMP31]], i32* [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32*, i32** [[_TMP6]], align 8 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK4-NEXT: store i32 [[TMP33]], i32* [[TMP12]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK4: .omp.lastprivate.done: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP14]]) // CHECK4-NEXT: ret void // // @@ -2782,6 +2860,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32* }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32* }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32* }>** [[BLOCK_ADDR]], align 8 @@ -2812,19 +2891,22 @@ // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 1 // CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32* }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR2]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.SS* [[THIS]], i32* [[TMP5]]) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR2]], align 8 +// CHECK4-NEXT: store i32* [[TMP7]], i32** [[TMP6]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2838,83 +2920,85 @@ // CHECK4-NEXT: [[_TMP4:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK4-NEXT: store i32* [[TMP2]], i32** [[_TMP1]], align 8 +// CHECK4-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK4-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[_TMP1]], align 8 // CHECK4-NEXT: store i32* [[_TMP2]], i32** [[_TMP3]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: store i32* [[C]], i32** [[_TMP4]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK4-NEXT: store i32 [[ADD]], i32* [[TMP11]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK4-NEXT: store i32 [[INC]], i32* [[TMP12]], align 4 -// CHECK4-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32*, i32** [[_TMP4]], align 8 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[TMP14]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: store i32 [[INC]], i32* [[TMP15]], align 4 +// CHECK4-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 2 // CHECK4-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[B]], align 8 // CHECK4-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 4 // CHECK4-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 4 // CHECK4-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 // CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[BF_CAST]], -1 -// CHECK4-NEXT: [[TMP14:%.*]] = trunc i32 [[DEC]] to i8 +// CHECK4-NEXT: [[TMP17:%.*]] = trunc i32 [[DEC]] to i8 // CHECK4-NEXT: [[BF_LOAD6:%.*]] = load i8, i8* [[B]], align 8 -// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP14]], 15 +// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP17]], 15 // CHECK4-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD6]], -16 // CHECK4-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK4-NEXT: store i8 [[BF_SET]], i8* [[B]], align 8 // CHECK4-NEXT: [[BF_RESULT_SHL:%.*]] = shl i8 [[BF_VALUE]], 4 // CHECK4-NEXT: [[BF_RESULT_ASHR:%.*]] = ashr i8 [[BF_RESULT_SHL]], 4 // CHECK4-NEXT: [[BF_RESULT_CAST:%.*]] = sext i8 [[BF_RESULT_ASHR]] to i32 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 1 -// CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP15]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP4]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP19]], 1 +// CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP18]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) // CHECK4-NEXT: ret void // // @@ -2928,6 +3012,10 @@ // CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK5-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK5-NEXT: call void @_ZN2SSC1ERi(%struct.SS* noundef nonnull align 8 dereferenceable(24) [[SS]], i32* noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE5sivar) // CHECK5-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) @@ -2939,26 +3027,36 @@ // CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 // CHECK5-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK5-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[T_VAR]], [2 x i32]* [[VEC]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[VAR]], i32* @_ZZ4mainE5sivar) -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i32* [[T_VAR]], i32** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP5]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_3]]) // CHECK5-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK5-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done1: +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done4: // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK5-NEXT: ret i32 [[TMP2]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK5-NEXT: ret i32 [[TMP7]] // // // CHECK5-LABEL: define {{[^@]+}}@_ZN2SSC1ERi @@ -2998,44 +3096,42 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK5-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK5-NEXT: [[SIVAR5:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: @@ -3045,105 +3141,105 @@ // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP13]], i32* [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 [[IDXPROM7]] -// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX8]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[SIVAR5]], align 4 -// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP16]] -// CHECK5-NEXT: store i32 [[ADD9]], i32* [[SIVAR5]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK5-NEXT: store i32 [[TMP19]], i32* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM2]] +// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] +// CHECK5-NEXT: store i32 [[ADD4]], i32* [[SIVAR]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK5-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK5-NEXT: store i32 [[TMP23]], i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK5-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP26:%.*]] = bitcast [2 x %struct.S]* [[S_ARR3]] to %struct.S* -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN11]], [[TMP27]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK5-NEXT: store i32 [[TMP29]], i32* [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK5-NEXT: [[TMP31:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP32:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN6]], [[TMP33]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP26]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[CALL12:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP32]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[CALL7:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done13: -// CHECK5-NEXT: [[CALL14:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[SIVAR5]], align 4 -// CHECK5-NEXT: store i32 [[TMP28]], i32* [[TMP4]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP33]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done8: +// CHECK5-NEXT: [[CALL9:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK5-NEXT: store i32 [[TMP34]], i32* [[TMP10]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN15]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK5-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done16: -// CHECK5-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP31]]) +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done11: +// CHECK5-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP37]]) // CHECK5-NEXT: ret void // // @@ -3158,10 +3254,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3173,74 +3270,77 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load float, float* @f, align 4 -// CHECK5-NEXT: store float [[TMP0]], float* [[F]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK5-NEXT: [[TMP1:%.*]] = load float, float* @f, align 4 +// CHECK5-NEXT: store float [[TMP1]], float* [[F]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load double, double* [[X]], align 8 -// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00 +// CHECK5-NEXT: [[TMP10:%.*]] = load double, double* [[X]], align 8 +// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP10]], 1.000000e+00 // CHECK5-NEXT: store double [[INC]], double* [[X]], align 8 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP13:%.*]] = load double, double* [[X]], align 8 -// CHECK5-NEXT: store double [[TMP13]], double* @_ZN1A1xE, align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = load float, float* [[F]], align 4 -// CHECK5-NEXT: store float [[TMP14]], float* @f, align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load double, double* [[X]], align 8 +// CHECK5-NEXT: store double [[TMP14]], double* @_ZN1A1xE, align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = load float, float* [[F]], align 4 +// CHECK5-NEXT: store float [[TMP15]], float* @f, align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3250,78 +3350,81 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[DOTF__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP1]], i64 4, i8* inttoptr (i64 3 to i8*)) +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: [[DOTF__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP2]], i64 4, i8* inttoptr (i64 3 to i8*)) // CHECK5-NEXT: [[DOTF__ADDR:%.*]] = bitcast i8* [[DOTF__VOID_ADDR]] to float* -// CHECK5-NEXT: [[TMP2:%.*]] = load float, float* @f, align 4 -// CHECK5-NEXT: store float [[TMP2]], float* [[DOTF__ADDR]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load float, float* @f, align 4 +// CHECK5-NEXT: store float [[TMP3]], float* [[DOTF__ADDR]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load double, double* @_ZN1A1xE, align 8 -// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00 +// CHECK5-NEXT: [[TMP10:%.*]] = load double, double* @_ZN1A1xE, align 8 +// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP10]], 1.000000e+00 // CHECK5-NEXT: store double [[INC]], double* @_ZN1A1xE, align 8 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP13:%.*]] = load float, float* [[DOTF__ADDR]], align 4 -// CHECK5-NEXT: store float [[TMP13]], float* @f, align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load float, float* [[DOTF__ADDR]], align 4 +// CHECK5-NEXT: store float [[TMP14]], float* @f, align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: [[TMP14:%.*]] = bitcast float* [[DOTF__ADDR]] to i8* -// CHECK5-NEXT: call void @__kmpc_free(i32 [[TMP1]], i8* [[TMP14]], i8* inttoptr (i64 3 to i8*)) -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK5-NEXT: [[TMP15:%.*]] = bitcast float* [[DOTF__ADDR]] to i8* +// CHECK5-NEXT: call void @__kmpc_free(i32 [[TMP2]], i8* [[TMP15]], i8* inttoptr (i64 3 to i8*)) +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3332,88 +3435,90 @@ // CHECK5-NEXT: [[CNT:%.*]] = alloca i8, align 1 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[DOTCNT__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP1]], i64 1, i8* inttoptr (i64 3 to i8*)) -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], %struct.lasprivate.conditional* [[F]], i32 0, i32 1 -// CHECK5-NEXT: store i8 0, i8* [[TMP2]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], %struct.lasprivate.conditional* [[F]], i32 0, i32 0 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: [[DOTCNT__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP2]], i64 1, i8* inttoptr (i64 3 to i8*)) +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], %struct.lasprivate.conditional* [[F]], i32 0, i32 1 +// CHECK5-NEXT: store i8 0, i8* [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], %struct.lasprivate.conditional* [[F]], i32 0, i32 0 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 // CHECK5-NEXT: store i8 [[CONV]], i8* [[DOTCNT__VOID_ADDR]], align 1 -// CHECK5-NEXT: [[TMP10:%.*]] = load double, double* @_ZN1A1xE, align 8 -// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP10]], 1.000000e+00 +// CHECK5-NEXT: [[TMP11:%.*]] = load double, double* @_ZN1A1xE, align 8 +// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 // CHECK5-NEXT: store double [[INC]], double* @_ZN1A1xE, align 8 -// CHECK5-NEXT: store float 0.000000e+00, float* [[TMP3]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = icmp sle i32 [[TMP12]], [[TMP11]] -// CHECK5-NEXT: br i1 [[TMP13]], label [[LP_COND_THEN:%.*]], label [[LP_COND_EXIT:%.*]] +// CHECK5-NEXT: store float 0.000000e+00, float* [[TMP4]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = icmp sle i32 [[TMP13]], [[TMP12]] +// CHECK5-NEXT: br i1 [[TMP14]], label [[LP_COND_THEN:%.*]], label [[LP_COND_EXIT:%.*]] // CHECK5: lp_cond_then: -// CHECK5-NEXT: store i32 [[TMP11]], i32* @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load float, float* [[TMP3]], align 4 -// CHECK5-NEXT: store float [[TMP14]], float* @{{pl_cond[.].+[.|,]}} align 4 +// CHECK5-NEXT: store i32 [[TMP12]], i32* @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load float, float* [[TMP4]], align 4 +// CHECK5-NEXT: store float [[TMP15]], float* @{{pl_cond[.].+[.|,]}} align 4 // CHECK5-NEXT: br label [[LP_COND_EXIT]] // CHECK5: lp_cond_exit: -// CHECK5-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK5-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: // CHECK5-NEXT: store i8 2, i8* [[DOTCNT__VOID_ADDR]], align 1 -// CHECK5-NEXT: [[TMP18:%.*]] = load i8, i8* [[DOTCNT__VOID_ADDR]], align 1 -// CHECK5-NEXT: store i8 [[TMP18]], i8* @cnt, align 1 -// CHECK5-NEXT: [[TMP19:%.*]] = load float, float* @{{pl_cond[.].+[.|,]}} align 4 -// CHECK5-NEXT: store float [[TMP19]], float* [[TMP3]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load float, float* [[TMP3]], align 4 -// CHECK5-NEXT: store float [[TMP20]], float* @f, align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCNT__VOID_ADDR]], align 1 +// CHECK5-NEXT: store i8 [[TMP19]], i8* @cnt, align 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load float, float* @{{pl_cond[.].+[.|,]}} align 4 +// CHECK5-NEXT: store float [[TMP20]], float* [[TMP4]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load float, float* [[TMP4]], align 4 +// CHECK5-NEXT: store float [[TMP21]], float* @f, align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @__kmpc_free(i32 [[TMP1]], i8* [[DOTCNT__VOID_ADDR]], i8* inttoptr (i64 3 to i8*)) -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_free(i32 [[TMP2]], i8* [[DOTCNT__VOID_ADDR]], i8* inttoptr (i64 3 to i8*)) +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // @@ -3421,38 +3526,47 @@ // CHECK5-SAME: () #[[ATTR7:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_4:%.*]], align 4 // CHECK5-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK5-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 128 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.4], align 128 +// CHECK5-NEXT: [[VAR:%.*]] = alloca %struct.S.4*, align 128 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK5-NEXT: call void @_ZN3SSTIiEC1Ev(%struct.SST* noundef nonnull align 4 dereferenceable(4) [[SST]]) // CHECK5-NEXT: store i32 0, i32* [[T_VAR]], align 128 // CHECK5-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP0]], i8* align 128 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK5-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 128 -// CHECK5-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 128 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[T_VAR]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP1]]) +// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], [2 x %struct.S.4]* [[S_ARR]], i64 0, i64 0 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_4]], %struct.S.4* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK5-NEXT: store %struct.S.4* [[TEST]], %struct.S.4** [[VAR]], align 128 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i32* [[T_VAR]], i32** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store [2 x %struct.S.4]* [[S_ARR]], [2 x %struct.S.4]** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP5:%.*]] = load %struct.S.4*, %struct.S.4** [[VAR]], align 128 +// CHECK5-NEXT: store %struct.S.4* [[TMP5]], %struct.S.4** [[TMP4]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], [2 x %struct.S.4]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_4]], %struct.S.4* [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.4* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_4]], %struct.S.4* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.4* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK5: arraydestroy.done1: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK5-NEXT: ret i32 [[TMP3]] +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK5-NEXT: ret i32 [[TMP7]] // // // CHECK5-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -3460,6 +3574,7 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 @@ -3483,7 +3598,9 @@ // CHECK5-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 3 // CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[D_ADDR]], align 8 // CHECK5-NEXT: store i32* [[TMP1]], i32** [[C]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP2]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: store i32* [[TMP]], i32** [[_TMP2]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 @@ -3491,44 +3608,44 @@ // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: store i32* [[A3]], i32** [[_TMP4]], align 8 // CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK5-NEXT: store i32 [[ADD]], i32* [[TMP8]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[INC]], i32* [[TMP9]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[TMP9]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: store i32 [[INC]], i32* [[TMP10]], align 4 // CHECK5-NEXT: [[B6:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2 // CHECK5-NEXT: [[BF_LOAD7:%.*]] = load i8, i8* [[B6]], align 8 // CHECK5-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD7]], 4 // CHECK5-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 4 // CHECK5-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 // CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[BF_CAST]], -1 -// CHECK5-NEXT: [[TMP11:%.*]] = trunc i32 [[DEC]] to i8 +// CHECK5-NEXT: [[TMP12:%.*]] = trunc i32 [[DEC]] to i8 // CHECK5-NEXT: [[BF_LOAD8:%.*]] = load i8, i8* [[B6]], align 8 -// CHECK5-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP11]], 15 +// CHECK5-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP12]], 15 // CHECK5-NEXT: [[BF_CLEAR9:%.*]] = and i8 [[BF_LOAD8]], -16 // CHECK5-NEXT: [[BF_SET10:%.*]] = or i8 [[BF_CLEAR9]], [[BF_VALUE]] // CHECK5-NEXT: store i8 [[BF_SET10]], i8* [[B6]], align 8 @@ -3536,16 +3653,16 @@ // CHECK5-NEXT: [[BF_RESULT_ASHR:%.*]] = ashr i8 [[BF_RESULT_SHL]], 4 // CHECK5-NEXT: [[BF_RESULT_CAST:%.*]] = sext i8 [[BF_RESULT_ASHR]] to i32 // CHECK5-NEXT: [[C11:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 3 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32*, i32** [[C11]], align 8 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP13]], 1 -// CHECK5-NEXT: store i32 [[DIV]], i32* [[TMP12]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[C11]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP14]], 1 +// CHECK5-NEXT: store i32 [[DIV]], i32* [[TMP13]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: @@ -3557,11 +3674,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK5-NEXT: [[E:%.*]] = alloca [4 x i8]*, align 8 // CHECK5-NEXT: [[A:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -3585,112 +3702,114 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK5-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 1 // CHECK5-NEXT: store [4 x i8]* [[E1]], [4 x i8]** [[E]], align 8 -// CHECK5-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 0 // CHECK5-NEXT: store i32* [[A2]], i32** [[A]], align 8 -// CHECK5-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 3 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C3]], align 8 -// CHECK5-NEXT: store i32* [[TMP1]], i32** [[C]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A]], align 8 -// CHECK5-NEXT: store i32* [[TMP2]], i32** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C]], align 8 -// CHECK5-NEXT: store i32* [[TMP3]], i32** [[_TMP4]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load [4 x i8]*, [4 x i8]** [[E]], align 8 -// CHECK5-NEXT: store [4 x i8]* [[TMP4]], [4 x i8]** [[_TMP5]], align 8 +// CHECK5-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C3]], align 8 +// CHECK5-NEXT: store i32* [[TMP3]], i32** [[C]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A]], align 8 +// CHECK5-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[C]], align 8 +// CHECK5-NEXT: store i32* [[TMP5]], i32** [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = load [4 x i8]*, [4 x i8]** [[E]], align 8 +// CHECK5-NEXT: store [4 x i8]* [[TMP6]], [4 x i8]** [[_TMP5]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP5]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = bitcast [4 x i8]* [[E7]] to i8* -// CHECK5-NEXT: [[TMP7:%.*]] = bitcast [4 x i8]* [[TMP5]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP6]], i8* align 1 [[TMP7]], i64 4, i1 false) +// CHECK5-NEXT: [[TMP7:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP5]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = bitcast [4 x i8]* [[E7]] to i8* +// CHECK5-NEXT: [[TMP9:%.*]] = bitcast [4 x i8]* [[TMP7]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP8]], i8* align 1 [[TMP9]], i64 4, i1 false) // CHECK5-NEXT: store [4 x i8]* [[E7]], [4 x i8]** [[_TMP8]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]]) -// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]]) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK5-NEXT: store i32* [[A9]], i32** [[_TMP10]], align 8 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP4]], align 8 // CHECK5-NEXT: store i32* [[C12]], i32** [[_TMP13]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP5]], align 8 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP5]], align 8 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32*, i32** [[_TMP10]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK5-NEXT: store i32 [[INC]], i32* [[TMP19]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[B11]], align 4 -// CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP21]], -1 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32*, i32** [[_TMP10]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK5-NEXT: store i32 [[INC]], i32* [[TMP21]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[B11]], align 4 +// CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP23]], -1 // CHECK5-NEXT: store i32 [[DEC]], i32* [[B11]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = load i32*, i32** [[_TMP13]], align 8 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP23]], 1 -// CHECK5-NEXT: store i32 [[DIV]], i32* [[TMP22]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32*, i32** [[_TMP13]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP25]], 1 +// CHECK5-NEXT: store i32 [[DIV]], i32* [[TMP24]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK5-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]]) -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK5-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP27:%.*]] = load i32*, i32** [[_TMP10]], align 8 -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK5-NEXT: store i32 [[TMP28]], i32* [[TMP10]], align 4 -// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[B11]], align 4 -// CHECK5-NEXT: store i32 [[TMP29]], i32* [[B]], align 4 -// CHECK5-NEXT: [[TMP30:%.*]] = load i32*, i32** [[_TMP13]], align 8 -// CHECK5-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -// CHECK5-NEXT: store i32 [[TMP31]], i32* [[TMP11]], align 4 -// CHECK5-NEXT: [[TMP32:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP8]], align 8 -// CHECK5-NEXT: [[TMP33:%.*]] = load [4 x i8], [4 x i8]* [[TMP32]], align 1 -// CHECK5-NEXT: store [4 x i8] [[TMP33]], [4 x i8]* [[TMP12]], align 1 -// CHECK5-NEXT: [[TMP34:%.*]] = load i32, i32* [[B11]], align 4 -// CHECK5-NEXT: [[B16:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 2 -// CHECK5-NEXT: [[TMP35:%.*]] = trunc i32 [[TMP34]] to i8 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32*, i32** [[_TMP10]], align 8 +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK5-NEXT: store i32 [[TMP30]], i32* [[TMP12]], align 4 +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, i32* [[B11]], align 4 +// CHECK5-NEXT: store i32 [[TMP31]], i32* [[B]], align 4 +// CHECK5-NEXT: [[TMP32:%.*]] = load i32*, i32** [[_TMP13]], align 8 +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK5-NEXT: store i32 [[TMP33]], i32* [[TMP13]], align 4 +// CHECK5-NEXT: [[TMP34:%.*]] = load [4 x i8]*, [4 x i8]** [[_TMP8]], align 8 +// CHECK5-NEXT: [[TMP35:%.*]] = load [4 x i8], [4 x i8]* [[TMP34]], align 1 +// CHECK5-NEXT: store [4 x i8] [[TMP35]], [4 x i8]* [[TMP14]], align 1 +// CHECK5-NEXT: [[TMP36:%.*]] = load i32, i32* [[B11]], align 4 +// CHECK5-NEXT: [[B16:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP37:%.*]] = trunc i32 [[TMP36]] to i8 // CHECK5-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[B16]], align 8 -// CHECK5-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP35]], 15 +// CHECK5-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP37]], 15 // CHECK5-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16 // CHECK5-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK5-NEXT: store i8 [[BF_SET]], i8* [[B16]], align 8 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]]) +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]]) // CHECK5-NEXT: ret void // // @@ -3729,12 +3848,12 @@ // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK5-SAME: (%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.4*, align 8 +// CHECK5-NEXT: store %struct.S.4* [[THIS]], %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.4*, %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK5-NEXT: ret void // // @@ -3749,185 +3868,184 @@ // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK5-SAME: (%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.4*, align 8 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.S.4* [[THIS]], %struct.S.4** [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.4*, %struct.S.4** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK5-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK5-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 +// CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S.4*, align 8 +// CHECK5-NEXT: [[_TMP1:%.*]] = alloca %struct.S.4*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR3:%.*]] = alloca i32, align 128 -// CHECK5-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 128 -// CHECK5-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK5-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK5-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.4], align 128 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_4:%.*]], align 128 +// CHECK5-NEXT: [[_TMP3:%.*]] = alloca %struct.S.4*, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK5-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[_TMP1]], align 8 +// CHECK5-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.4]*, [2 x %struct.S.4]** [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load %struct.S.4*, %struct.S.4** [[TMP7]], align 8 +// CHECK5-NEXT: store %struct.S.4* [[TMP8]], %struct.S.4** [[TMP]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = load %struct.S.4*, %struct.S.4** [[TMP]], align 8 +// CHECK5-NEXT: store %struct.S.4* [[TMP9]], %struct.S.4** [[_TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], [2 x %struct.S.4]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_4]], %struct.S.4* [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: -// CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.4* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_4]], %struct.S.4* [[ARRAYCTOR_CUR]], i64 1 +// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.4* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK5-NEXT: store %struct.S.0* [[VAR6]], %struct.S.0** [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.S.4*, %struct.S.4** [[_TMP1]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store %struct.S.4* [[VAR]], %struct.S.4** [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR3]], align 128 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX10]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP16]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR]], align 128 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK5-NEXT: store i32 [[TMP19]], i32* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load %struct.S.4*, %struct.S.4** [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.4], [2 x %struct.S.4]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.4* @_ZN1SIiEaSERKS0_(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX6]], %struct.S.4* noundef nonnull align 4 dereferenceable(4) [[TMP21]]) // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK5-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR3]], align 128 -// CHECK5-NEXT: store i32 [[TMP23]], i32* [[TMP0]], align 128 -// CHECK5-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK5-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP24]], i8* align 128 [[TMP25]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP26:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR5]] to %struct.S.0* -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN12]], [[TMP27]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 128 +// CHECK5-NEXT: store i32 [[TMP28]], i32* [[TMP2]], align 128 +// CHECK5-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK5-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP29]], i8* align 128 [[TMP30]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.4], [2 x %struct.S.4]* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP31:%.*]] = bitcast [2 x %struct.S.4]* [[S_ARR]] to %struct.S.4* +// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_S_4]], %struct.S.4* [[ARRAY_BEGIN8]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.4* [[ARRAY_BEGIN8]], [[TMP32]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP26]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[CALL13:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done14: -// CHECK5-NEXT: [[TMP28:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK5-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP28]]) +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.4* [ [[TMP31]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.4* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[CALL9:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.4* @_ZN1SIiEaSERKS0_(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.4* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_4]], %struct.S.4* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_4]], %struct.S.4* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.4* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP32]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done10: +// CHECK5-NEXT: [[TMP33:%.*]] = load %struct.S.4*, %struct.S.4** [[_TMP3]], align 8 +// CHECK5-NEXT: [[CALL11:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.4* @_ZN1SIiEaSERKS0_(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[TMP10]], %struct.S.4* noundef nonnull align 4 dereferenceable(4) [[TMP33]]) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAY_BEGIN16:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN16]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK5-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.4], [2 x %struct.S.4]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_4]], %struct.S.4* [[ARRAY_BEGIN12]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN16]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE17:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done17: -// CHECK5-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP31]]) +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.4* [ [[TMP34]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_4]], %struct.S.4* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.4* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done13: +// CHECK5-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP36]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK5-SAME: (%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.4*, align 8 +// CHECK5-NEXT: store %struct.S.4* [[THIS]], %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.4*, %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiED2Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK5-SAME: (%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.4*, align 8 +// CHECK5-NEXT: store %struct.S.4* [[THIS]], %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.4*, %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_4:%.*]], %struct.S.4* [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: store i32 0, i32* [[F]], align 4 // CHECK5-NEXT: ret void // @@ -3936,6 +4054,7 @@ // CHECK5-SAME: (%struct.SST* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 @@ -3950,7 +4069,9 @@ // CHECK5-NEXT: [[THIS1:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: store i32 0, i32* [[A]], align 4 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SST* [[THIS1]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SST* [[THIS1]], %struct.SST** [[TMP1]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: store i32* [[TMP]], i32** [[_TMP2]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 @@ -3958,41 +4079,41 @@ // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: store i32* [[A3]], i32** [[_TMP4]], align 8 // CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 1 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK5-NEXT: store i32 [[ADD]], i32* [[TMP7]], align 4 // CHECK5-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[INC]], i32* [[TMP8]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[TMP8]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: store i32 [[INC]], i32* [[TMP9]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK5-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: @@ -4004,11 +4125,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SST* noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK5-NEXT: [[A:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -4022,93 +4143,95 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SST*, %struct.SST** [[TMP1]], align 8 +// CHECK5-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[TMP2]], i32 0, i32 0 // CHECK5-NEXT: store i32* [[A1]], i32** [[A]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A]], align 8 -// CHECK5-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[A]], align 8 +// CHECK5-NEXT: store i32* [[TMP3]], i32** [[TMP]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK5-NEXT: store i32* [[A3]], i32** [[_TMP4]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK5-NEXT: store i32 [[INC]], i32* [[TMP11]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: store i32 [[INC]], i32* [[TMP13]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK5-NEXT: store i32 [[TMP17]], i32* [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK5-NEXT: store i32 [[TMP19]], i32* [[TMP4]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK5-SAME: (%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.4*, align 8 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.S.4* [[THIS]], %struct.S.4** [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.4*, %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_4:%.*]], %struct.S.4* [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK5-SAME: (%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.4*, align 8 +// CHECK5-NEXT: store %struct.S.4* [[THIS]], %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.4*, %struct.S.4** [[THIS_ADDR]], align 8 // CHECK5-NEXT: ret void // diff --git a/clang/test/OpenMP/for_linear_codegen.cpp b/clang/test/OpenMP/for_linear_codegen.cpp --- a/clang/test/OpenMP/for_linear_codegen.cpp +++ b/clang/test/OpenMP/for_linear_codegen.cpp @@ -192,18 +192,23 @@ // CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: [[PVAR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[LVAR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN2SSC1ERi(%struct.SS* noundef nonnull align 8 dereferenceable(16) [[SS]], i32* noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE5sivar) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TEST]], i32 0, i32 0 // CHECK1-NEXT: store float* [[F]], float** [[PVAR]], align 8 // CHECK1-NEXT: store i64 0, i64* [[LVAR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, i64*)* @.omp_outlined. to void (i32*, i32*, ...)*), float** [[PVAR]], i64* [[LVAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store float** [[PVAR]], float*** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[LVAR]], i64** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4:[0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP0]] +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP2]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC1ERi @@ -230,12 +235,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[PVAR:%.*]], i64* noundef nonnull align 8 dereferenceable(8) [[LVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[PVAR_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[LVAR_ADDR:%.*]] = alloca i64*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca float*, align 8 @@ -245,115 +249,122 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[PVAR2:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[PVAR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store float** [[PVAR]], float*** [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64* [[LVAR]], i64** [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float**, float*** [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64*, i64** [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK1-NEXT: store float* [[TMP2]], float** [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 8 -// CHECK1-NEXT: store i64 [[TMP3]], i64* [[DOTLINEAR_START1]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK1-NEXT: store float* [[TMP5]], float** [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: store i64 [[TMP6]], i64* [[DOTLINEAR_START1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]]) -// CHECK1-NEXT: [[DOTLVAR__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP5]], i64 8, i8* inttoptr (i64 5 to i8*)) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]]) +// CHECK1-NEXT: [[DOTLVAR__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP8]], i64 8, i8* inttoptr (i64 5 to i8*)) // CHECK1-NEXT: [[DOTLVAR__ADDR:%.*]] = bitcast i8* [[DOTLVAR__VOID_ADDR]] to i64* -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load float*, float** [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP13]], 3 -// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL4]] to i64 -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDX_EXT]] -// CHECK1-NEXT: store float* [[ADD_PTR]], float** [[PVAR2]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTLINEAR_START1]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP15]], 3 -// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[MUL5]] to i64 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP14]], [[CONV]] -// CHECK1-NEXT: store i64 [[ADD6]], i64* [[DOTLVAR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load float*, float** [[PVAR2]], align 8 -// CHECK1-NEXT: [[ADD_PTR7:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 3 -// CHECK1-NEXT: store float* [[ADD_PTR7]], float** [[PVAR2]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTLVAR__ADDR]], align 8 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP17]], 3 -// CHECK1-NEXT: store i64 [[ADD8]], i64* [[DOTLVAR__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load float*, float** [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP16]], 3 +// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL3]] to i64 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDX_EXT]] +// CHECK1-NEXT: store float* [[ADD_PTR]], float** [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTLINEAR_START1]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP18]], 3 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[MUL4]] to i64 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i64 [[TMP17]], [[CONV]] +// CHECK1-NEXT: store i64 [[ADD5]], i64* [[DOTLVAR__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load float*, float** [[PVAR]], align 8 +// CHECK1-NEXT: [[ADD_PTR6:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 3 +// CHECK1-NEXT: store float* [[ADD_PTR6]], float** [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTLVAR__ADDR]], align 8 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP20]], 3 +// CHECK1-NEXT: store i64 [[ADD7]], i64* [[DOTLVAR__ADDR]], align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i64* [[DOTLVAR__ADDR]] to i8* -// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP5]], i8* [[TMP19]], i8* inttoptr (i64 5 to i8*)) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast i64* [[DOTLVAR__ADDR]] to i8* +// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP8]], i8* [[TMP22]], i8* inttoptr (i64 5 to i8*)) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP22:%.*]] = load float*, float** [[PVAR2]], align 8 -// CHECK1-NEXT: store float* [[TMP22]], float** [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTLVAR__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP23]], i64* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load float*, float** [[PVAR]], align 8 +// CHECK1-NEXT: store float* [[TMP25]], float** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTLVAR__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[TMP26]], i64* [[TMP4]], align 8 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v // CHECK1-SAME: () #[[ATTR5:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK1-NEXT: [[PVAR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[LVAR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN3SSTIiEC1Ev(%struct.SST* noundef nonnull align 4 dereferenceable(4) [[SST]]) -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TEST]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[TEST]], i32 0, i32 0 // CHECK1-NEXT: store i32* [[F]], i32** [[PVAR]], align 8 -// CHECK1-NEXT: [[F1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TEST]], i32 0, i32 0 +// CHECK1-NEXT: [[F1:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[TEST]], i32 0, i32 0 // CHECK1-NEXT: store i32* [[F1]], i32** [[LVAR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[LVAR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32**, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32** [[PVAR]], i32* [[TMP0]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32** [[PVAR]], i32*** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[LVAR]], align 8 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK1-NEXT: ret i32 0 // // @@ -372,6 +383,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 @@ -385,16 +397,18 @@ // CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[D_ADDR]], align 8 // CHECK1-NEXT: store i32* [[TMP0]], i32** [[C]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[C:%.*]] = alloca i32*, align 8 @@ -419,124 +433,126 @@ // CHECK1-NEXT: [[_TMP21:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store i32* [[A1]], i32** [[A]], align 8 -// CHECK1-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C2]], align 8 -// CHECK1-NEXT: store i32* [[TMP1]], i32** [[C]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A]], align 8 -// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C]], align 8 -// CHECK1-NEXT: store i32* [[TMP3]], i32** [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[B]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTLINEAR_START5]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START6]], align 4 +// CHECK1-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C2]], align 8 +// CHECK1-NEXT: store i32* [[TMP3]], i32** [[C]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A]], align 8 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[C]], align 8 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START5]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTLINEAR_START6]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: store i32* [[A7]], i32** [[_TMP8]], align 8 // CHECK1-NEXT: store i32* [[C10]], i32** [[_TMP11]], align 8 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[MUL13]] -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[A7]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTLINEAR_START5]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP19]], [[MUL15]] -// CHECK1-NEXT: store i32 [[ADD16]], i32* [[B9]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTLINEAR_START6]], align 4 +// CHECK1-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], [[MUL13]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[A7]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTLINEAR_START5]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP21]], [[MUL17]] +// CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP21]], [[MUL15]] +// CHECK1-NEXT: store i32 [[ADD16]], i32* [[B9]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTLINEAR_START6]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], [[MUL17]] // CHECK1-NEXT: store i32 [[ADD18]], i32* [[C10]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[_TMP8]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP23]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[B9]], align 4 -// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP25]], -1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[_TMP8]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP25]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[B9]], align 4 +// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP27]], -1 // CHECK1-NEXT: store i32 [[DEC]], i32* [[B9]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[_TMP11]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP27]], 1 -// CHECK1-NEXT: store i32 [[DIV]], i32* [[TMP26]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[_TMP11]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[DIV]], i32* [[TMP28]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]]) -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK1-NEXT: store i32* [[TMP31]], i32** [[_TMP20]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[A7]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32*, i32** [[_TMP20]], align 8 -// CHECK1-NEXT: store i32 [[TMP32]], i32* [[TMP33]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[B9]], align 4 -// CHECK1-NEXT: store i32 [[TMP34]], i32* [[B]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK1-NEXT: store i32* [[TMP35]], i32** [[_TMP21]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[C10]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[_TMP21]], align 8 -// CHECK1-NEXT: store i32 [[TMP36]], i32* [[TMP37]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[B]], align 4 -// CHECK1-NEXT: [[B22:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP39:%.*]] = trunc i32 [[TMP38]] to i8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK1-NEXT: store i32* [[TMP33]], i32** [[_TMP20]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[A7]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[_TMP20]], align 8 +// CHECK1-NEXT: store i32 [[TMP34]], i32* [[TMP35]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[B9]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK1-NEXT: store i32* [[TMP37]], i32** [[_TMP21]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[C10]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32*, i32** [[_TMP21]], align 8 +// CHECK1-NEXT: store i32 [[TMP38]], i32* [[TMP39]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-NEXT: [[B22:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP41:%.*]] = trunc i32 [[TMP40]] to i8 // CHECK1-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[B22]], align 4 -// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP39]], 15 +// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP41]], 15 // CHECK1-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16 // CHECK1-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK1-NEXT: store i8 [[BF_SET]], i8* [[B22]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -552,12 +568,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK1-NEXT: ret void // // @@ -572,12 +588,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[PVAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[LVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[PVAR_ADDR:%.*]] = alloca i32**, align 8 -// CHECK1-NEXT: [[LVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -589,121 +604,123 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[PVAR4:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[LVAR5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[_TMP12:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[PVAR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[LVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[_TMP10:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32** [[PVAR]], i32*** [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[LVAR]], i32** [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK1-NEXT: store i32* [[TMP2]], i32** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP0]], align 8 -// CHECK1-NEXT: store i32* [[TMP3]], i32** [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTLINEAR_START3]], align 4 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP2]], align 8 +// CHECK1-NEXT: store i32* [[TMP6]], i32** [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START3]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: store i32* [[LVAR5]], i32** [[_TMP6]], align 8 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: store i32* [[LVAR]], i32** [[_TMP4]], align 8 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL8]] to i64 -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i64 [[IDX_EXT]] -// CHECK1-NEXT: store i32* [[ADD_PTR]], i32** [[PVAR4]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP16]], [[MUL9]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[LVAR5]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[PVAR4]], align 8 -// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[TMP18]], i32 1 -// CHECK1-NEXT: store i32* [[INCDEC_PTR]], i32** [[PVAR4]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL6]] to i64 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 [[IDX_EXT]] +// CHECK1-NEXT: store i32* [[ADD_PTR]], i32** [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[MUL7]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[LVAR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[PVAR]], align 8 +// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 1 +// CHECK1-NEXT: store i32* [[INCDEC_PTR]], i32** [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP22]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[PVAR4]], align 8 -// CHECK1-NEXT: store i32* [[TMP24]], i32** [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[_TMP1]], align 8 -// CHECK1-NEXT: store i32* [[TMP25]], i32** [[_TMP12]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[LVAR5]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[_TMP12]], align 8 -// CHECK1-NEXT: store i32 [[TMP26]], i32* [[TMP27]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[PVAR]], align 8 +// CHECK1-NEXT: store i32* [[TMP27]], i32** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK1-NEXT: store i32* [[TMP28]], i32** [[_TMP10]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[LVAR]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32*, i32** [[_TMP10]], align 8 +// CHECK1-NEXT: store i32 [[TMP29]], i32* [[TMP30]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, i32* [[F]], align 4 // CHECK1-NEXT: ret void // @@ -712,20 +729,23 @@ // CHECK1-SAME: (%struct.SST* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, i32* [[A]], align 4 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.SST* [[THIS1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SST* [[THIS1]], %struct.SST** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SST* noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -741,90 +761,92 @@ // CHECK1-NEXT: [[_TMP9:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SST*, %struct.SST** [[TMP1]], align 8 +// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store i32* [[A1]], i32** [[A]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A]], align 8 -// CHECK1-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[A]], align 8 +// CHECK1-NEXT: store i32* [[TMP3]], i32** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTLINEAR_START]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) // CHECK1-NEXT: store i32* [[A3]], i32** [[_TMP4]], align 8 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], [[MUL6]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], [[MUL6]] // CHECK1-NEXT: store i32 [[ADD7]], i32* [[A3]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP14]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP16]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK1-NEXT: store i32* [[TMP19]], i32** [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[_TMP9]], align 8 -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK1-NEXT: store i32* [[TMP21]], i32** [[_TMP9]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[_TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[TMP23]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -867,6 +889,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK3-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 @@ -880,16 +903,18 @@ // CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[D_ADDR]], align 8 // CHECK3-NEXT: store i32* [[TMP0]], i32** [[C]], align 8 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP1]], align 8 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK3-NEXT: [[A:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C:%.*]] = alloca i32*, align 8 @@ -915,124 +940,126 @@ // CHECK3-NEXT: [[_TMP21:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK3-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store i32* [[A1]], i32** [[A]], align 8 -// CHECK3-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C2]], align 8 -// CHECK3-NEXT: store i32* [[TMP1]], i32** [[C]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A]], align 8 -// CHECK3-NEXT: store i32* [[TMP2]], i32** [[TMP]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C]], align 8 -// CHECK3-NEXT: store i32* [[TMP3]], i32** [[_TMP3]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[B]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTLINEAR_START5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START6]], align 4 +// CHECK3-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C2]], align 8 +// CHECK3-NEXT: store i32* [[TMP3]], i32** [[C]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A]], align 8 +// CHECK3-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[C]], align 8 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START5]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTLINEAR_START6]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]]) // CHECK3-NEXT: store i32* [[A7]], i32** [[_TMP8]], align 8 // CHECK3-NEXT: store i32* [[C10]], i32** [[_TMP11]], align 8 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[MUL13]] -// CHECK3-NEXT: store i32 [[ADD14]], i32* [[A7]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTLINEAR_START5]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP19]], [[MUL15]] -// CHECK3-NEXT: store i32 [[ADD16]], i32* [[B9]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTLINEAR_START6]], align 4 +// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], [[MUL13]] +// CHECK3-NEXT: store i32 [[ADD14]], i32* [[A7]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTLINEAR_START5]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP22]], 1 -// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP21]], [[MUL17]] +// CHECK3-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP21]], [[MUL15]] +// CHECK3-NEXT: store i32 [[ADD16]], i32* [[B9]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTLINEAR_START6]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], [[MUL17]] // CHECK3-NEXT: store i32 [[ADD18]], i32* [[C10]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP23]], align 8 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32*, i32** [[_TMP8]], align 8 -// CHECK3-NEXT: store i32* [[TMP25]], i32** [[TMP24]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[B9]], i32** [[TMP26]], align 8 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32*, i32** [[_TMP11]], align 8 -// CHECK3-NEXT: store i32* [[TMP28]], i32** [[TMP27]], align 8 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP25]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[_TMP8]], align 8 +// CHECK3-NEXT: store i32* [[TMP27]], i32** [[TMP26]], align 8 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[B9]], i32** [[TMP28]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32*, i32** [[_TMP11]], align 8 +// CHECK3-NEXT: store i32* [[TMP30]], i32** [[TMP29]], align 8 // CHECK3-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP31]], 1 // CHECK3-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]]) -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK3-NEXT: store i32* [[TMP32]], i32** [[_TMP20]], align 8 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[A7]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32*, i32** [[_TMP20]], align 8 -// CHECK3-NEXT: store i32 [[TMP33]], i32* [[TMP34]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[B9]], align 4 -// CHECK3-NEXT: store i32 [[TMP35]], i32* [[B]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK3-NEXT: store i32* [[TMP36]], i32** [[_TMP21]], align 8 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[C10]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32*, i32** [[_TMP21]], align 8 -// CHECK3-NEXT: store i32 [[TMP37]], i32* [[TMP38]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[B]], align 4 -// CHECK3-NEXT: [[B22:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP40:%.*]] = trunc i32 [[TMP39]] to i8 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK3-NEXT: store i32* [[TMP34]], i32** [[_TMP20]], align 8 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[A7]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32*, i32** [[_TMP20]], align 8 +// CHECK3-NEXT: store i32 [[TMP35]], i32* [[TMP36]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[B9]], align 4 +// CHECK3-NEXT: store i32 [[TMP37]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK3-NEXT: store i32* [[TMP38]], i32** [[_TMP21]], align 8 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[C10]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32*, i32** [[_TMP21]], align 8 +// CHECK3-NEXT: store i32 [[TMP39]], i32* [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: [[B22:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP42:%.*]] = trunc i32 [[TMP41]] to i8 // CHECK3-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[B22]], align 4 -// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP40]], 15 +// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP42]], 15 // CHECK3-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16 // CHECK3-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK3-NEXT: store i8 [[BF_SET]], i8* [[B22]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK3-NEXT: ret void // // @@ -1040,6 +1067,7 @@ // CHECK3-SAME: (%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR2:[0-9]+]] align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.0*, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK3-NEXT: store %class.anon.0* [[THIS]], %class.anon.0** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load %class.anon.0*, %class.anon.0** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], %class.anon.0* [[THIS1]], i32 0, i32 0 @@ -1059,25 +1087,30 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[DIV]], i32* [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP1]], %struct.SS** [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1 // CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[TMP15]], align 8 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.SS* [[TMP1]], i32* [[TMP12]], i32* [[TMP14]], i32* [[TMP16]]) +// CHECK3-NEXT: store i32* [[TMP14]], i32** [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[TMP16]], align 8 +// CHECK3-NEXT: store i32* [[TMP17]], i32** [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP19]], align 8 +// CHECK3-NEXT: store i32* [[TMP20]], i32** [[TMP18]], align 8 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 @@ -1092,136 +1125,139 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A7:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP7:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP8:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[B9:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C10:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP11:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[_TMP20:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[_TMP21:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[_TMP17:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[_TMP18:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[B_ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 -// CHECK3-NEXT: store i32* [[TMP3]], i32** [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK3-NEXT: store i32* [[TMP4]], i32** [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[_TMP1]], align 8 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[_TMP3]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START5]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTLINEAR_START6]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK3-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 8 +// CHECK3-NEXT: store i32* [[TMP8]], i32** [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK3-NEXT: store i32* [[TMP9]], i32** [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK3-NEXT: store i32* [[TMP10]], i32** [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTLINEAR_START5]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTLINEAR_START6]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: store i32* [[A7]], i32** [[_TMP8]], align 8 -// CHECK3-NEXT: store i32* [[C10]], i32** [[_TMP11]], align 8 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: store i32* [[A]], i32** [[_TMP7]], align 8 +// CHECK3-NEXT: store i32* [[C]], i32** [[_TMP8]], align 8 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], [[MUL13]] -// CHECK3-NEXT: store i32 [[ADD14]], i32* [[A7]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTLINEAR_START5]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP22]], 1 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP21]], [[MUL15]] -// CHECK3-NEXT: store i32 [[ADD16]], i32* [[B9]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTLINEAR_START6]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], [[MUL17]] -// CHECK3-NEXT: store i32 [[ADD18]], i32* [[C10]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32*, i32** [[_TMP8]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP26]], 1 -// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[B9]], align 4 -// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP27]], -1 -// CHECK3-NEXT: store i32 [[DEC]], i32* [[B9]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32*, i32** [[_TMP11]], align 8 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP29]], 1 -// CHECK3-NEXT: store i32 [[DIV]], i32* [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP25]], 1 +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP24]], [[MUL10]] +// CHECK3-NEXT: store i32 [[ADD11]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTLINEAR_START5]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP27]], 1 +// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP26]], [[MUL12]] +// CHECK3-NEXT: store i32 [[ADD13]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTLINEAR_START6]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL14:%.*]] = mul nsw i32 [[TMP29]], 1 +// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP28]], [[MUL14]] +// CHECK3-NEXT: store i32 [[ADD15]], i32* [[C]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32*, i32** [[_TMP7]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP32]], -1 +// CHECK3-NEXT: store i32 [[DEC]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32*, i32** [[_TMP8]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP34]], 1 +// CHECK3-NEXT: store i32 [[DIV]], i32* [[TMP33]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK3-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK3-NEXT: store i32 [[ADD16]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]]) -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK3-NEXT: br i1 [[TMP37]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK3-NEXT: store i32* [[TMP33]], i32** [[_TMP20]], align 8 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[A7]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32*, i32** [[_TMP20]], align 8 -// CHECK3-NEXT: store i32 [[TMP34]], i32* [[TMP35]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[B9]], align 4 -// CHECK3-NEXT: store i32 [[TMP36]], i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK3-NEXT: store i32* [[TMP37]], i32** [[_TMP21]], align 8 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[C10]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32*, i32** [[_TMP21]], align 8 -// CHECK3-NEXT: store i32 [[TMP38]], i32* [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK3-NEXT: store i32* [[TMP38]], i32** [[_TMP17]], align 8 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32*, i32** [[_TMP17]], align 8 +// CHECK3-NEXT: store i32 [[TMP39]], i32* [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: store i32 [[TMP41]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK3-NEXT: store i32* [[TMP42]], i32** [[_TMP18]], align 8 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[C]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32*, i32** [[_TMP18]], align 8 +// CHECK3-NEXT: store i32 [[TMP43]], i32* [[TMP44]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1235,98 +1271,100 @@ // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 8 // CHECK3-NEXT: [[_TMP12:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** @g1, align 8 -// CHECK3-NEXT: store i32* [[TMP0]], i32** [[TMP]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* @g, align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** @g1, align 8 +// CHECK3-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* @g, align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTLINEAR_START2]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* @g, align 4 +// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTLINEAR_START2]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) // CHECK3-NEXT: store i32* [[G1]], i32** [[_TMP3]], align 8 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP12]], 5 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], [[MUL5]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP13]], 5 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], i32* [[G]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START2]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP14]], 5 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], [[MUL7]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTLINEAR_START2]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP15]], 5 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP14]], [[MUL7]] // CHECK3-NEXT: store i32 [[ADD8]], i32* [[G1]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[G]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 5 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[G]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], 5 // CHECK3-NEXT: store i32 [[ADD9]], i32* [[G]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = load volatile i32, i32* [[TMP16]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 5 -// CHECK3-NEXT: store volatile i32 [[ADD10]], i32* [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store i32* [[G]], i32** [[TMP18]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK3-NEXT: store i32* [[TMP20]], i32** [[TMP19]], align 8 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load volatile i32, i32* [[TMP17]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 5 +// CHECK3-NEXT: store volatile i32 [[ADD10]], i32* [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[G]], i32** [[TMP19]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK3-NEXT: store i32* [[TMP21]], i32** [[TMP20]], align 8 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.3* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK3-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[G]], align 4 -// CHECK3-NEXT: store i32 [[TMP24]], i32* @g, align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32*, i32** @g1, align 8 -// CHECK3-NEXT: store i32* [[TMP25]], i32** [[_TMP12]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[G1]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[_TMP12]], align 8 -// CHECK3-NEXT: store volatile i32 [[TMP26]], i32* [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[G]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* @g, align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** @g1, align 8 +// CHECK3-NEXT: store i32* [[TMP26]], i32** [[_TMP12]], align 8 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[G1]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32*, i32** [[_TMP12]], align 8 +// CHECK3-NEXT: store volatile i32 [[TMP27]], i32* [[TMP28]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) // CHECK3-NEXT: ret void // // @@ -1361,18 +1399,20 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1390,65 +1430,67 @@ // CHECK4-NEXT: [[_TMP13:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** @g1, align 8 -// CHECK4-NEXT: store i32* [[TMP0]], i32** [[TMP]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* @g, align 4 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** @g1, align 8 +// CHECK4-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* @g, align 4 -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTLINEAR_START2]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* @g, align 4 +// CHECK4-NEXT: store i32 [[TMP3]], i32* [[DOTLINEAR_START2]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]]) +// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]]) // CHECK4-NEXT: store i32* [[G1]], i32** [[_TMP3]], align 8 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP12]], 5 -// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], [[MUL5]] +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP13]], 5 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], [[MUL5]] // CHECK4-NEXT: store i32 [[ADD6]], i32* [[G]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START2]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP14]], 5 -// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], [[MUL7]] +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTLINEAR_START2]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP15]], 5 +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP14]], [[MUL7]] // CHECK4-NEXT: store i32 [[ADD8]], i32* [[G1]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[G]], align 4 -// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 5 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[G]], align 4 +// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], 5 // CHECK4-NEXT: store i32 [[ADD9]], i32* [[G]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP17:%.*]] = load volatile i32, i32* [[TMP16]], align 4 -// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 5 -// CHECK4-NEXT: store volatile i32 [[ADD10]], i32* [[TMP16]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = load volatile i32, i32* [[TMP17]], align 4 +// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 5 +// CHECK4-NEXT: store volatile i32 [[ADD10]], i32* [[TMP17]], align 4 // CHECK4-NEXT: store i32 1, i32* [[G]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK4-NEXT: store volatile i32 5, i32* [[TMP18]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK4-NEXT: store volatile i32 5, i32* [[TMP19]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32 }>* [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32 }>* [[BLOCK]], i32 0, i32 1 @@ -1460,44 +1502,44 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32 }>* [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.1 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32 }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP19:%.*]] = load volatile i32, i32* [[G]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP19]], i32* [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP20:%.*]] = load volatile i32, i32* [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP20]], i32* [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED11:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32 }>* [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK4-NEXT: store i32* [[TMP20]], i32** [[BLOCK_CAPTURED11]], align 8 -// CHECK4-NEXT: [[TMP21:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32 }>* [[BLOCK]] to void ()* -// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP21]] to %struct.__block_literal_generic* -// CHECK4-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP23:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK4-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP22]], align 8 -// CHECK4-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP25]](i8* noundef [[TMP23]]) +// CHECK4-NEXT: [[TMP21:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK4-NEXT: store i32* [[TMP21]], i32** [[BLOCK_CAPTURED11]], align 8 +// CHECK4-NEXT: [[TMP22:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32*, i32 }>* [[BLOCK]] to void ()* +// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP22]] to %struct.__block_literal_generic* +// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP24:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK4-NEXT: [[TMP25:%.*]] = load i8*, i8** [[TMP23]], align 8 +// CHECK4-NEXT: [[TMP26:%.*]] = bitcast i8* [[TMP25]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP26]](i8* noundef [[TMP24]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK4-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK4-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]]) +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK4-NEXT: br i1 [[TMP29]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK4: .omp.linear.pu: -// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[G]], align 4 -// CHECK4-NEXT: store i32 [[TMP29]], i32* @g, align 4 -// CHECK4-NEXT: [[TMP30:%.*]] = load i32*, i32** @g1, align 8 -// CHECK4-NEXT: store i32* [[TMP30]], i32** [[_TMP13]], align 8 -// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[G1]], align 4 -// CHECK4-NEXT: [[TMP32:%.*]] = load i32*, i32** [[_TMP13]], align 8 -// CHECK4-NEXT: store volatile i32 [[TMP31]], i32* [[TMP32]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[G]], align 4 +// CHECK4-NEXT: store i32 [[TMP30]], i32* @g, align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32*, i32** @g1, align 8 +// CHECK4-NEXT: store i32* [[TMP31]], i32** [[_TMP13]], align 8 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[G1]], align 4 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32*, i32** [[_TMP13]], align 8 +// CHECK4-NEXT: store volatile i32 [[TMP32]], i32* [[TMP33]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK4: .omp.linear.pu.done: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) // CHECK4-NEXT: ret void // // @@ -1522,6 +1564,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK4-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 @@ -1535,16 +1578,18 @@ // CHECK4-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2 // CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[D_ADDR]], align 8 // CHECK4-NEXT: store i32* [[TMP0]], i32** [[C]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]]) +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP1]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK4-NEXT: [[A:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[C:%.*]] = alloca i32*, align 8 @@ -1570,73 +1615,75 @@ // CHECK4-NEXT: [[_TMP23:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK4-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 // CHECK4-NEXT: store i32* [[A1]], i32** [[A]], align 8 -// CHECK4-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C2]], align 8 -// CHECK4-NEXT: store i32* [[TMP1]], i32** [[C]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A]], align 8 -// CHECK4-NEXT: store i32* [[TMP2]], i32** [[TMP]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C]], align 8 -// CHECK4-NEXT: store i32* [[TMP3]], i32** [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTLINEAR_START]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[B]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTLINEAR_START5]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START6]], align 4 +// CHECK4-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C2]], align 8 +// CHECK4-NEXT: store i32* [[TMP3]], i32** [[C]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A]], align 8 +// CHECK4-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[C]], align 8 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[B]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START5]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], i32* [[DOTLINEAR_START6]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK4-NEXT: store i32* [[A7]], i32** [[_TMP8]], align 8 // CHECK4-NEXT: store i32* [[C10]], i32** [[_TMP11]], align 8 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK4-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK4-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[MUL13]] -// CHECK4-NEXT: store i32 [[ADD14]], i32* [[A7]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTLINEAR_START5]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP20]], 1 -// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP19]], [[MUL15]] -// CHECK4-NEXT: store i32 [[ADD16]], i32* [[B9]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTLINEAR_START6]], align 4 +// CHECK4-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK4-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], [[MUL13]] +// CHECK4-NEXT: store i32 [[ADD14]], i32* [[A7]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTLINEAR_START5]], align 4 // CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP22]], 1 -// CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP21]], [[MUL17]] +// CHECK4-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP21]], [[MUL15]] +// CHECK4-NEXT: store i32 [[ADD16]], i32* [[B9]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTLINEAR_START6]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP24]], 1 +// CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], [[MUL17]] // CHECK4-NEXT: store i32 [[ADD18]], i32* [[C10]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 8 @@ -1649,62 +1696,62 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.4 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS_ADDR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[BLOCK_CAPTURED_THIS_ADDR]], align 8 +// CHECK4-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[BLOCK_CAPTURED_THIS_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32*, i32** [[_TMP8]], align 8 -// CHECK4-NEXT: store i32* [[TMP23]], i32** [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32*, i32** [[_TMP8]], align 8 +// CHECK4-NEXT: store i32* [[TMP25]], i32** [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[B9]], align 4 -// CHECK4-NEXT: store i32 [[TMP24]], i32* [[BLOCK_CAPTURED19]], align 8 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[B9]], align 4 +// CHECK4-NEXT: store i32 [[TMP26]], i32* [[BLOCK_CAPTURED19]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED20:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32*, i32** [[_TMP11]], align 8 -// CHECK4-NEXT: store i32* [[TMP25]], i32** [[BLOCK_CAPTURED20]], align 8 -// CHECK4-NEXT: [[TMP26:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]] to void ()* -// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP26]] to %struct.__block_literal_generic* -// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP28:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK4-NEXT: [[TMP29:%.*]] = load i8*, i8** [[TMP27]], align 8 -// CHECK4-NEXT: [[TMP30:%.*]] = bitcast i8* [[TMP29]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP30]](i8* noundef [[TMP28]]) +// CHECK4-NEXT: [[TMP27:%.*]] = load i32*, i32** [[_TMP11]], align 8 +// CHECK4-NEXT: store i32* [[TMP27]], i32** [[BLOCK_CAPTURED20]], align 8 +// CHECK4-NEXT: [[TMP28:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]] to void ()* +// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP28]] to %struct.__block_literal_generic* +// CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP30:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK4-NEXT: [[TMP31:%.*]] = load i8*, i8** [[TMP29]], align 8 +// CHECK4-NEXT: [[TMP32:%.*]] = bitcast i8* [[TMP31]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP32]](i8* noundef [[TMP30]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK4-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]]) -// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK4-NEXT: br i1 [[TMP33]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]]) +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK4-NEXT: br i1 [[TMP35]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK4: .omp.linear.pu: -// CHECK4-NEXT: [[TMP34:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK4-NEXT: store i32* [[TMP34]], i32** [[_TMP22]], align 8 -// CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[A7]], align 4 -// CHECK4-NEXT: [[TMP36:%.*]] = load i32*, i32** [[_TMP22]], align 8 -// CHECK4-NEXT: store i32 [[TMP35]], i32* [[TMP36]], align 4 -// CHECK4-NEXT: [[TMP37:%.*]] = load i32, i32* [[B9]], align 4 -// CHECK4-NEXT: store i32 [[TMP37]], i32* [[B]], align 4 -// CHECK4-NEXT: [[TMP38:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK4-NEXT: store i32* [[TMP38]], i32** [[_TMP23]], align 8 -// CHECK4-NEXT: [[TMP39:%.*]] = load i32, i32* [[C10]], align 4 -// CHECK4-NEXT: [[TMP40:%.*]] = load i32*, i32** [[_TMP23]], align 8 -// CHECK4-NEXT: store i32 [[TMP39]], i32* [[TMP40]], align 4 -// CHECK4-NEXT: [[TMP41:%.*]] = load i32, i32* [[B]], align 4 -// CHECK4-NEXT: [[B24:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP42:%.*]] = trunc i32 [[TMP41]] to i8 +// CHECK4-NEXT: [[TMP36:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK4-NEXT: store i32* [[TMP36]], i32** [[_TMP22]], align 8 +// CHECK4-NEXT: [[TMP37:%.*]] = load i32, i32* [[A7]], align 4 +// CHECK4-NEXT: [[TMP38:%.*]] = load i32*, i32** [[_TMP22]], align 8 +// CHECK4-NEXT: store i32 [[TMP37]], i32* [[TMP38]], align 4 +// CHECK4-NEXT: [[TMP39:%.*]] = load i32, i32* [[B9]], align 4 +// CHECK4-NEXT: store i32 [[TMP39]], i32* [[B]], align 4 +// CHECK4-NEXT: [[TMP40:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK4-NEXT: store i32* [[TMP40]], i32** [[_TMP23]], align 8 +// CHECK4-NEXT: [[TMP41:%.*]] = load i32, i32* [[C10]], align 4 +// CHECK4-NEXT: [[TMP42:%.*]] = load i32*, i32** [[_TMP23]], align 8 +// CHECK4-NEXT: store i32 [[TMP41]], i32* [[TMP42]], align 4 +// CHECK4-NEXT: [[TMP43:%.*]] = load i32, i32* [[B]], align 4 +// CHECK4-NEXT: [[B24:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP44:%.*]] = trunc i32 [[TMP43]] to i8 // CHECK4-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[B24]], align 4 -// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP42]], 15 +// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP44]], 15 // CHECK4-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16 // CHECK4-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK4-NEXT: store i8 [[BF_SET]], i8* [[B24]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK4: .omp.linear.pu.done: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK4-NEXT: ret void // // @@ -1713,6 +1760,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>** [[BLOCK_ADDR]], align 8 @@ -1732,24 +1780,29 @@ // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 1 // CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR3]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR3]], align 8 +// CHECK4-NEXT: store i32* [[TMP7]], i32** [[TMP6]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR4:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 8 +// CHECK4-NEXT: store i32* [[BLOCK_CAPTURE_ADDR4]], i32** [[TMP8]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR5:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR5]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.SS* [[THIS]], i32* [[TMP5]], i32* [[BLOCK_CAPTURE_ADDR4]], i32* [[TMP6]]) +// CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR5]], align 8 +// CHECK4-NEXT: store i32* [[TMP10]], i32** [[TMP9]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR3]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 @@ -1764,128 +1817,130 @@ // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[A7:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[_TMP7:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP8:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[B9:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C10:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[_TMP11:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[_TMP20:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[_TMP21:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[_TMP17:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[_TMP18:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 -// CHECK4-NEXT: store i32* [[TMP3]], i32** [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK4-NEXT: store i32* [[TMP4]], i32** [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[_TMP1]], align 8 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START5]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK4-NEXT: store i32 [[TMP10]], i32* [[DOTLINEAR_START6]], align 4 +// CHECK4-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK4-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 8 +// CHECK4-NEXT: store i32* [[TMP8]], i32** [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK4-NEXT: store i32* [[TMP9]], i32** [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK4-NEXT: store i32* [[TMP10]], i32** [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK4-NEXT: store i32 [[TMP12]], i32* [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK4-NEXT: store i32 [[TMP13]], i32* [[DOTLINEAR_START5]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], i32* [[DOTLINEAR_START6]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) -// CHECK4-NEXT: store i32* [[A7]], i32** [[_TMP8]], align 8 -// CHECK4-NEXT: store i32* [[C10]], i32** [[_TMP11]], align 8 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK4-NEXT: store i32* [[A]], i32** [[_TMP7]], align 8 +// CHECK4-NEXT: store i32* [[C]], i32** [[_TMP8]], align 8 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK4-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK4-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP20]], 1 -// CHECK4-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], [[MUL13]] -// CHECK4-NEXT: store i32 [[ADD14]], i32* [[A7]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTLINEAR_START5]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP22]], 1 -// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP21]], [[MUL15]] -// CHECK4-NEXT: store i32 [[ADD16]], i32* [[B9]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTLINEAR_START6]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP24]], 1 -// CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], [[MUL17]] -// CHECK4-NEXT: store i32 [[ADD18]], i32* [[C10]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32*, i32** [[_TMP8]], align 8 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP26]], 1 -// CHECK4-NEXT: store i32 [[INC]], i32* [[TMP25]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[B9]], align 4 -// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP27]], -1 -// CHECK4-NEXT: store i32 [[DEC]], i32* [[B9]], align 4 -// CHECK4-NEXT: [[TMP28:%.*]] = load i32*, i32** [[_TMP11]], align 8 -// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP29]], 1 -// CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP28]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP25]], 1 +// CHECK4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP24]], [[MUL10]] +// CHECK4-NEXT: store i32 [[ADD11]], i32* [[A]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTLINEAR_START5]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP27]], 1 +// CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP26]], [[MUL12]] +// CHECK4-NEXT: store i32 [[ADD13]], i32* [[B]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTLINEAR_START6]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL14:%.*]] = mul nsw i32 [[TMP29]], 1 +// CHECK4-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP28]], [[MUL14]] +// CHECK4-NEXT: store i32 [[ADD15]], i32* [[C]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32*, i32** [[_TMP7]], align 8 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK4-NEXT: store i32 [[INC]], i32* [[TMP30]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[B]], align 4 +// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP32]], -1 +// CHECK4-NEXT: store i32 [[DEC]], i32* [[B]], align 4 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32*, i32** [[_TMP8]], align 8 +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP34]], 1 +// CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP33]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK4-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK4-NEXT: store i32 [[ADD16]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]]) -// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK4-NEXT: br i1 [[TMP32]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]]) +// CHECK4-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK4-NEXT: br i1 [[TMP37]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK4: .omp.linear.pu: -// CHECK4-NEXT: [[TMP33:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK4-NEXT: store i32* [[TMP33]], i32** [[_TMP20]], align 8 -// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[A7]], align 4 -// CHECK4-NEXT: [[TMP35:%.*]] = load i32*, i32** [[_TMP20]], align 8 -// CHECK4-NEXT: store i32 [[TMP34]], i32* [[TMP35]], align 4 -// CHECK4-NEXT: [[TMP36:%.*]] = load i32, i32* [[B9]], align 4 -// CHECK4-NEXT: store i32 [[TMP36]], i32* [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP37:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK4-NEXT: store i32* [[TMP37]], i32** [[_TMP21]], align 8 -// CHECK4-NEXT: [[TMP38:%.*]] = load i32, i32* [[C10]], align 4 -// CHECK4-NEXT: [[C22:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP39:%.*]] = load i32*, i32** [[C22]], align 8 -// CHECK4-NEXT: store i32 [[TMP38]], i32* [[TMP39]], align 4 +// CHECK4-NEXT: [[TMP38:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK4-NEXT: store i32* [[TMP38]], i32** [[_TMP17]], align 8 +// CHECK4-NEXT: [[TMP39:%.*]] = load i32, i32* [[A]], align 4 +// CHECK4-NEXT: [[TMP40:%.*]] = load i32*, i32** [[_TMP17]], align 8 +// CHECK4-NEXT: store i32 [[TMP39]], i32* [[TMP40]], align 4 +// CHECK4-NEXT: [[TMP41:%.*]] = load i32, i32* [[B]], align 4 +// CHECK4-NEXT: store i32 [[TMP41]], i32* [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP42:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK4-NEXT: store i32* [[TMP42]], i32** [[_TMP18]], align 8 +// CHECK4-NEXT: [[TMP43:%.*]] = load i32, i32* [[C]], align 4 +// CHECK4-NEXT: [[C19:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP44:%.*]] = load i32*, i32** [[C19]], align 8 +// CHECK4-NEXT: store i32 [[TMP43]], i32* [[TMP44]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK4: .omp.linear.pu.done: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) // CHECK4-NEXT: ret void // diff --git a/clang/test/OpenMP/for_private_codegen.cpp b/clang/test/OpenMP/for_private_codegen.cpp --- a/clang/test/OpenMP/for_private_codegen.cpp +++ b/clang/test/OpenMP/for_private_codegen.cpp @@ -116,7 +116,9 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 @@ -127,8 +129,8 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: store %struct.S* [[TEST]], %struct.S** [[VAR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 @@ -139,8 +141,8 @@ // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done1: +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done2: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK1-NEXT: ret i32 [[TMP2]] @@ -170,10 +172,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 8 @@ -190,6 +193,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store %struct.S* undef, %struct.S** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 @@ -207,75 +212,75 @@ // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8* -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[TMP10]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8* +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[TMP11]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done8: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP20]]) // CHECK1-NEXT: ret void // // @@ -290,10 +295,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -304,50 +310,52 @@ // CHECK1-NEXT: [[I1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -355,33 +363,34 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK1-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK1-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK1-NEXT: ret i32 [[TMP2]] // @@ -421,178 +430,181 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5]] to i8* -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP10]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false) +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S.1* [[ARRAYIDX5]] to i8* +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S.1* [[TMP11]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done8: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, i32* [[F]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -607,10 +619,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 @@ -627,67 +640,69 @@ // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store double* undef, double** [[_TMP1]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK3-NEXT: store double 1.000000e+00, double* [[G]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load double*, double** [[_TMP2]], align 8 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load double*, double** [[_TMP2]], align 8 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP9]], align 8 // CHECK3-NEXT: store i32 3, i32* [[SVAR]], align 4 // CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G]], double** [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP2]], align 8 -// CHECK3-NEXT: store double* [[TMP11]], double** [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP2]], align 8 +// CHECK3-NEXT: store double* [[TMP12]], double** [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP14]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // @@ -707,18 +722,20 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 @@ -735,42 +752,44 @@ // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double*, i32, float }>, align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK4-NEXT: store double* undef, double** [[_TMP1]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: store double* [[G1]], double** [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK4-NEXT: store double 1.000000e+00, double* [[G]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load double*, double** [[_TMP2]], align 8 -// CHECK4-NEXT: store volatile double 1.000000e+00, double* [[TMP8]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load double*, double** [[_TMP2]], align 8 +// CHECK4-NEXT: store volatile double 1.000000e+00, double* [[TMP9]], align 8 // CHECK4-NEXT: store i32 2, i32* [[SVAR]], align 4 // CHECK4-NEXT: store float 3.000000e+00, float* [[SFVAR]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double*, i32, float }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double*, i32, float }>* [[BLOCK]], i32 0, i32 0 @@ -784,37 +803,37 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double*, i32, float }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double*, i32, float }>* [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.1 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double*, i32, float }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double*, i32, float }>* [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP9:%.*]] = load volatile double, double* [[G]], align 8 -// CHECK4-NEXT: store volatile double [[TMP9]], double* [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load volatile double, double* [[G]], align 8 +// CHECK4-NEXT: store volatile double [[TMP10]], double* [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED4:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double*, i32, float }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double*, i32, float }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP2]], align 8 -// CHECK4-NEXT: store double* [[TMP10]], double** [[BLOCK_CAPTURED4]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP2]], align 8 +// CHECK4-NEXT: store double* [[TMP11]], double** [[BLOCK_CAPTURED4]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double*, i32, float }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double*, i32, float }>* [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[SVAR]], align 4 -// CHECK4-NEXT: store i32 [[TMP11]], i32* [[BLOCK_CAPTURED5]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP12]], i32* [[BLOCK_CAPTURED5]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED6:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double*, i32, float }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double*, i32, float }>* [[BLOCK]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP12:%.*]] = load float, float* [[SFVAR]], align 4 -// CHECK4-NEXT: store float [[TMP12]], float* [[BLOCK_CAPTURED6]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double*, i32, float }>* [[BLOCK]] to void ()* -// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP13]] to %struct.__block_literal_generic* -// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP15:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK4-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP14]], align 8 -// CHECK4-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP17]](i8* noundef [[TMP15]]) +// CHECK4-NEXT: [[TMP13:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK4-NEXT: store float [[TMP13]], float* [[BLOCK_CAPTURED6]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double*, i32, float }>* [[BLOCK]] to void ()* +// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP14]] to %struct.__block_literal_generic* +// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP16:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK4-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP15]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP18]](i8* noundef [[TMP16]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/for_reduction_codegen.cpp b/clang/test/OpenMP/for_reduction_codegen.cpp --- a/clang/test/OpenMP/for_reduction_codegen.cpp +++ b/clang/test/OpenMP/for_reduction_codegen.cpp @@ -542,8 +542,21 @@ // CHECK1-NEXT: [[VAR2:%.*]] = alloca %struct.S**, align 8 // CHECK1-NEXT: [[VVAR2:%.*]] = alloca [5 x %struct.S], align 16 // CHECK1-NEXT: [[VAR3:%.*]] = alloca [4 x %struct.S]*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_10:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_12:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_13:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_14:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_15:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_16:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_17:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_18:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_19:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_20:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_21:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store float 0.000000e+00, float* [[T_VAR]], align 4 @@ -582,70 +595,124 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE8]], label [[ARRAYCTOR_CONT9:%.*]], label [[ARRAYCTOR_LOOP5]] // CHECK1: arrayctor.cont9: // CHECK1-NEXT: store [4 x %struct.S]* [[S_ARR]], [4 x %struct.S]** [[VAR3]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S*, %struct.S** [[VAR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, %struct.S*, %struct.S*, float*, [2 x i32]*, [4 x %struct.S]*)* @.omp_outlined. to void (i32*, i32*, ...)*), float* [[T_VAR]], %struct.S* [[TMP1]], %struct.S* [[VAR1]], float* [[T_VAR1]], [2 x i32]* [[VEC]], [4 x %struct.S]* [[S_ARR]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store float* [[T_VAR]], float** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR]], align 8 +// CHECK1-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.S* [[VAR1]], %struct.S** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[T_VAR1]], float** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store [4 x %struct.S]* [[S_ARR]], [4 x %struct.S]** [[TMP7]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = mul nuw i64 10, [[TMP3]] -// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP5]], align 16 -// CHECK1-NEXT: store i64 [[TMP3]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, [2 x i32]*, [10 x [4 x %struct.S]]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 10, i64 [[TMP3]], i32* [[VLA]], [2 x i32]* [[VEC]], [10 x [4 x %struct.S]]* [[ARRS]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, [10 x [4 x %struct.S]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 10, i64 [[TMP3]], i32* [[VLA]], [10 x [4 x %struct.S]]* [[ARRS]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 10, i64 [[TMP3]], i32* [[VLA]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S***)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.S*** [[VAR2]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S***)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S*** [[VAR2]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S***)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.S*** [[VAR2]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S***)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.S*** [[VAR2]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [5 x %struct.S]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), [5 x %struct.S]* [[VVAR2]]) -// CHECK1-NEXT: [[TMP6:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[VAR3]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [4 x %struct.S]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [4 x %struct.S]* [[TMP6]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[VAR3]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [4 x %struct.S]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), [4 x %struct.S]* [[TMP7]]) -// CHECK1-NEXT: [[TMP8:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[VAR3]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [4 x %struct.S]*)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [4 x %struct.S]* [[TMP8]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[VAR3]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [4 x %struct.S]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), [4 x %struct.S]* [[TMP9]]) -// CHECK1-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z5tmainIiLi42EET_v() -// CHECK1-NEXT: store i32 [[CALL10]], i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP10]]) -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[VVAR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i64 5 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = mul nuw i64 10, [[TMP9]] +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP11]], align 16 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 0 +// CHECK1-NEXT: store i64 10, i64* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[VLA]], i32** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 3 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 4 +// CHECK1-NEXT: store [10 x [4 x %struct.S]]* [[ARRS]], [10 x [4 x %struct.S]]** [[TMP16]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_10]]) +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK1-NEXT: store i64 10, i64* [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[VLA]], i32** [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 3 +// CHECK1-NEXT: store [10 x [4 x %struct.S]]* [[ARRS]], [10 x [4 x %struct.S]]** [[TMP20]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_11]]) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_12]], i32 0, i32 0 +// CHECK1-NEXT: store i64 10, i64* [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_12]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_12]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[VLA]], i32** [[TMP23]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_12]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_13]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S*** [[VAR2]], %struct.S**** [[TMP24]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_13]]) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_14]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S*** [[VAR2]], %struct.S**** [[TMP25]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_14]]) +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_15]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S*** [[VAR2]], %struct.S**** [[TMP26]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_15]]) +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_16]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S*** [[VAR2]], %struct.S**** [[TMP27]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_16]]) +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_17]], i32 0, i32 0 +// CHECK1-NEXT: store [5 x %struct.S]* [[VVAR2]], [5 x %struct.S]** [[TMP28]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_17]]) +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[VAR3]], align 8 +// CHECK1-NEXT: store [4 x %struct.S]* [[TMP30]], [4 x %struct.S]** [[TMP29]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_18]]) +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[VAR3]], align 8 +// CHECK1-NEXT: store [4 x %struct.S]* [[TMP32]], [4 x %struct.S]** [[TMP31]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_19]]) +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_20]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[VAR3]], align 8 +// CHECK1-NEXT: store [4 x %struct.S]* [[TMP34]], [4 x %struct.S]** [[TMP33]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..21 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_20]]) +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_21]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP36:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[VAR3]], align 8 +// CHECK1-NEXT: store [4 x %struct.S]* [[TMP36]], [4 x %struct.S]** [[TMP35]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_21]]) +// CHECK1-NEXT: [[CALL22:%.*]] = call noundef i32 @_Z5tmainIiLi42EET_v() +// CHECK1-NEXT: store i32 [[CALL22]], i32* [[RETVAL]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP37]]) +// CHECK1-NEXT: [[ARRAY_BEGIN23:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[VVAR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN23]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP11]], [[ARRAYCTOR_CONT9]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP38]], [[ARRAYCTOR_CONT9]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5:[0-9]+]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done12: -// CHECK1-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], [10 x [4 x %struct.S]]* [[ARRS]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN13]], i64 40 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY14:%.*]] -// CHECK1: arraydestroy.body14: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST15:%.*]] = phi %struct.S* [ [[TMP12]], [[ARRAYDESTROY_DONE12]] ], [ [[ARRAYDESTROY_ELEMENT16:%.*]], [[ARRAYDESTROY_BODY14]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT16]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST15]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT16]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE17:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT16]], [[ARRAY_BEGIN13]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE17]], label [[ARRAYDESTROY_DONE18:%.*]], label [[ARRAYDESTROY_BODY14]] -// CHECK1: arraydestroy.done18: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN19:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN19]], i64 4 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY20:%.*]] -// CHECK1: arraydestroy.body20: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST21:%.*]] = phi %struct.S* [ [[TMP13]], [[ARRAYDESTROY_DONE18]] ], [ [[ARRAYDESTROY_ELEMENT22:%.*]], [[ARRAYDESTROY_BODY20]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT22]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST21]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT22]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE23:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT22]], [[ARRAY_BEGIN19]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE23]], label [[ARRAYDESTROY_DONE24:%.*]], label [[ARRAYDESTROY_BODY20]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN23]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE24:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done24: +// CHECK1-NEXT: [[ARRAY_BEGIN25:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], [10 x [4 x %struct.S]]* [[ARRS]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN25]], i64 40 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY26:%.*]] +// CHECK1: arraydestroy.body26: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST27:%.*]] = phi %struct.S* [ [[TMP39]], [[ARRAYDESTROY_DONE24]] ], [ [[ARRAYDESTROY_ELEMENT28:%.*]], [[ARRAYDESTROY_BODY26]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT28]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST27]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT28]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE29:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT28]], [[ARRAY_BEGIN25]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE29]], label [[ARRAYDESTROY_DONE30:%.*]], label [[ARRAYDESTROY_BODY26]] +// CHECK1: arraydestroy.done30: +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAY_BEGIN31:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN31]], i64 4 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY32:%.*]] +// CHECK1: arraydestroy.body32: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST33:%.*]] = phi %struct.S* [ [[TMP40]], [[ARRAYDESTROY_DONE30]] ], [ [[ARRAYDESTROY_ELEMENT34:%.*]], [[ARRAYDESTROY_BODY32]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT34]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST33]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT34]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE35:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT34]], [[ARRAY_BEGIN31]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE35]], label [[ARRAYDESTROY_DONE36:%.*]], label [[ARRAYDESTROY_BODY32]] +// CHECK1: arraydestroy.done36: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP14]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP41]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -672,16 +739,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [4 x %struct.S]* noundef nonnull align 4 dereferenceable(16) [[S_ARR:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca float*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca float*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [4 x %struct.S]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -690,216 +752,218 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[VAR16:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[T_VAR17:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 8 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[REF_TMP23:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[REF_TMP19:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[_TMP31:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[_TMP27:%.*]] = alloca float, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store float* [[T_VAR]], float** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR1]], %struct.S** [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store float* [[T_VAR1]], float** [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store [4 x %struct.S]* [[S_ARR]], [4 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float*, float** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[TMP1]], %struct.S** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK1-NEXT: store %struct.S* [[TMP6]], %struct.S** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S*, %struct.S** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S*, %struct.S** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[TMP11]], align 8 +// CHECK1-NEXT: store %struct.S* [[TMP4]], %struct.S** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.S* [[TMP13]], %struct.S** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store float 0.000000e+00, float* [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: store %struct.S* [[VAR4]], %struct.S** [[_TMP5]], align 8 -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR16]]) -// CHECK1-NEXT: store float 0x47EFFFFFE0000000, float* [[T_VAR17]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK1-NEXT: store float 0.000000e+00, float* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 8 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: store float 0x47EFFFFFE0000000, float* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[T_VAR3]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP16]] to i32 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP23:%.*]] = load float, float* [[T_VAR]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP23]] to i32 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP10]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 [[CONV]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load %struct.S*, %struct.S** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[TMP5]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX10]] to i8* -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[TMP18]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP25:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[TMP12]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast %struct.S* [[TMP25]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]]) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast float* [[T_VAR3]] to i8* -// CHECK1-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast %struct.S* [[VAR16]] to i8* -// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP30:%.*]] = bitcast float* [[T_VAR17]] to i8* -// CHECK1-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 4, i64 32, i8* [[TMP31]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP32]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast float* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP33]], i8** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[VAR1]] to i8* +// CHECK1-NEXT: store i8* [[TMP35]], i8** [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP37:%.*]] = bitcast float* [[T_VAR1]] to i8* +// CHECK1-NEXT: store i8* [[TMP37]], i8** [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = bitcast [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 4, i64 32, i8* [[TMP38]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP39]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP33:%.*]] = load float, float* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load float, float* [[T_VAR3]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = fadd float [[TMP33]], [[TMP34]] -// CHECK1-NEXT: store float [[ADD12]], float* [[TMP0]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[TMP7]] to i8* -// CHECK1-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[CALL]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL13:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL13]], 0.000000e+00 +// CHECK1-NEXT: [[TMP40:%.*]] = load float, float* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load float, float* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = fadd float [[TMP40]], [[TMP41]] +// CHECK1-NEXT: store float [[ADD8]], float* [[TMP2]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP14]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast %struct.S* [[TMP14]] to i8* +// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[CALL]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL9:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL9]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL14:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR16]]) -// CHECK1-NEXT: [[TOBOOL15:%.*]] = fcmp une float [[CALL14]], 0.000000e+00 +// CHECK1-NEXT: [[CALL10:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL11:%.*]] = fcmp une float [[CALL10]], 0.000000e+00 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP37:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL15]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV16:%.*]] = uitofp i1 [[TMP37]] to float -// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], float noundef [[CONV16]]) -// CHECK1-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[TMP2]] to i8* -// CHECK1-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP44:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL11]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV12:%.*]] = uitofp i1 [[TMP44]] to float +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], float noundef [[CONV12]]) +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[TMP6]] to i8* +// CHECK1-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[REF_TMP]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP40:%.*]] = load float, float* [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load float, float* [[T_VAR17]], align 4 -// CHECK1-NEXT: [[CMP17:%.*]] = fcmp olt float [[TMP40]], [[TMP41]] -// CHECK1-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]] -// CHECK1: cond.true18: -// CHECK1-NEXT: [[TMP42:%.*]] = load float, float* [[TMP3]], align 4 -// CHECK1-NEXT: br label [[COND_END20:%.*]] -// CHECK1: cond.false19: -// CHECK1-NEXT: [[TMP43:%.*]] = load float, float* [[T_VAR17]], align 4 -// CHECK1-NEXT: br label [[COND_END20]] -// CHECK1: cond.end20: -// CHECK1-NEXT: [[COND21:%.*]] = phi float [ [[TMP42]], [[COND_TRUE18]] ], [ [[TMP43]], [[COND_FALSE19]] ] -// CHECK1-NEXT: store float [[COND21]], float* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP47:%.*]] = load float, float* [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load float, float* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP13:%.*]] = fcmp olt float [[TMP47]], [[TMP48]] +// CHECK1-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] +// CHECK1: cond.true14: +// CHECK1-NEXT: [[TMP49:%.*]] = load float, float* [[TMP8]], align 4 +// CHECK1-NEXT: br label [[COND_END16:%.*]] +// CHECK1: cond.false15: +// CHECK1-NEXT: [[TMP50:%.*]] = load float, float* [[T_VAR1]], align 4 +// CHECK1-NEXT: br label [[COND_END16]] +// CHECK1: cond.end16: +// CHECK1-NEXT: [[COND17:%.*]] = phi float [ [[TMP49]], [[COND_TRUE14]] ], [ [[TMP50]], [[COND_FALSE15]] ] +// CHECK1-NEXT: store float [[COND17]], float* [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP16]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP44:%.*]] = load float, float* [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = atomicrmw fadd float* [[TMP0]], float [[TMP44]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL22:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[TMP7]] to i8* -// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[CALL22]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL24:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL25:%.*]] = fcmp une float [[CALL24]], 0.000000e+00 -// CHECK1-NEXT: br i1 [[TOBOOL25]], label [[LAND_RHS26:%.*]], label [[LAND_END29:%.*]] -// CHECK1: land.rhs26: -// CHECK1-NEXT: [[CALL27:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR16]]) -// CHECK1-NEXT: [[TOBOOL28:%.*]] = fcmp une float [[CALL27]], 0.000000e+00 -// CHECK1-NEXT: br label [[LAND_END29]] -// CHECK1: land.end29: -// CHECK1-NEXT: [[TMP48:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL28]], [[LAND_RHS26]] ] -// CHECK1-NEXT: [[CONV30:%.*]] = uitofp i1 [[TMP48]] to float -// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP23]], float noundef [[CONV30]]) -// CHECK1-NEXT: [[TMP49:%.*]] = bitcast %struct.S* [[TMP2]] to i8* -// CHECK1-NEXT: [[TMP50:%.*]] = bitcast %struct.S* [[REF_TMP23]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP49]], i8* align 4 [[TMP50]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP23]]) #[[ATTR5]] -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP51:%.*]] = load float, float* [[T_VAR17]], align 4 -// CHECK1-NEXT: [[TMP52:%.*]] = bitcast float* [[TMP3]] to i32* -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP52]] monotonic, align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = load float, float* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = atomicrmw fadd float* [[TMP2]], float [[TMP51]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL18:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP14]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP53:%.*]] = bitcast %struct.S* [[TMP14]] to i8* +// CHECK1-NEXT: [[TMP54:%.*]] = bitcast %struct.S* [[CALL18]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP53]], i8* align 4 [[TMP54]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL20:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL21:%.*]] = fcmp une float [[CALL20]], 0.000000e+00 +// CHECK1-NEXT: br i1 [[TOBOOL21]], label [[LAND_RHS22:%.*]], label [[LAND_END25:%.*]] +// CHECK1: land.rhs22: +// CHECK1-NEXT: [[CALL23:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL24:%.*]] = fcmp une float [[CALL23]], 0.000000e+00 +// CHECK1-NEXT: br label [[LAND_END25]] +// CHECK1: land.end25: +// CHECK1-NEXT: [[TMP55:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL24]], [[LAND_RHS22]] ] +// CHECK1-NEXT: [[CONV26:%.*]] = uitofp i1 [[TMP55]] to float +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP19]], float noundef [[CONV26]]) +// CHECK1-NEXT: [[TMP56:%.*]] = bitcast %struct.S* [[TMP6]] to i8* +// CHECK1-NEXT: [[TMP57:%.*]] = bitcast %struct.S* [[REF_TMP19]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP56]], i8* align 4 [[TMP57]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP19]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP58:%.*]] = load float, float* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP59:%.*]] = bitcast float* [[TMP8]] to i32* +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP59]] monotonic, align 4 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP53:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END29]] ], [ [[TMP63:%.*]], [[COND_END35:%.*]] ] -// CHECK1-NEXT: [[TMP54:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32* -// CHECK1-NEXT: [[TMP55:%.*]] = bitcast i32 [[TMP53]] to float -// CHECK1-NEXT: store float [[TMP55]], float* [[_TMP31]], align 4 -// CHECK1-NEXT: [[TMP56:%.*]] = load float, float* [[_TMP31]], align 4 -// CHECK1-NEXT: [[TMP57:%.*]] = load float, float* [[T_VAR17]], align 4 -// CHECK1-NEXT: [[CMP32:%.*]] = fcmp olt float [[TMP56]], [[TMP57]] -// CHECK1-NEXT: br i1 [[CMP32]], label [[COND_TRUE33:%.*]], label [[COND_FALSE34:%.*]] -// CHECK1: cond.true33: -// CHECK1-NEXT: [[TMP58:%.*]] = load float, float* [[_TMP31]], align 4 -// CHECK1-NEXT: br label [[COND_END35]] -// CHECK1: cond.false34: -// CHECK1-NEXT: [[TMP59:%.*]] = load float, float* [[T_VAR17]], align 4 -// CHECK1-NEXT: br label [[COND_END35]] -// CHECK1: cond.end35: -// CHECK1-NEXT: [[COND36:%.*]] = phi float [ [[TMP58]], [[COND_TRUE33]] ], [ [[TMP59]], [[COND_FALSE34]] ] -// CHECK1-NEXT: store float [[COND36]], float* [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP54]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = bitcast float* [[TMP3]] to i32* -// CHECK1-NEXT: [[TMP62:%.*]] = cmpxchg i32* [[TMP61]], i32 [[TMP53]], i32 [[TMP60]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP63]] = extractvalue { i32, i1 } [[TMP62]], 0 -// CHECK1-NEXT: [[TMP64:%.*]] = extractvalue { i32, i1 } [[TMP62]], 1 -// CHECK1-NEXT: br i1 [[TMP64]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP60:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END25]] ], [ [[TMP70:%.*]], [[COND_END31:%.*]] ] +// CHECK1-NEXT: [[TMP61:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32* +// CHECK1-NEXT: [[TMP62:%.*]] = bitcast i32 [[TMP60]] to float +// CHECK1-NEXT: store float [[TMP62]], float* [[_TMP27]], align 4 +// CHECK1-NEXT: [[TMP63:%.*]] = load float, float* [[_TMP27]], align 4 +// CHECK1-NEXT: [[TMP64:%.*]] = load float, float* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP28:%.*]] = fcmp olt float [[TMP63]], [[TMP64]] +// CHECK1-NEXT: br i1 [[CMP28]], label [[COND_TRUE29:%.*]], label [[COND_FALSE30:%.*]] +// CHECK1: cond.true29: +// CHECK1-NEXT: [[TMP65:%.*]] = load float, float* [[_TMP27]], align 4 +// CHECK1-NEXT: br label [[COND_END31]] +// CHECK1: cond.false30: +// CHECK1-NEXT: [[TMP66:%.*]] = load float, float* [[T_VAR1]], align 4 +// CHECK1-NEXT: br label [[COND_END31]] +// CHECK1: cond.end31: +// CHECK1-NEXT: [[COND32:%.*]] = phi float [ [[TMP65]], [[COND_TRUE29]] ], [ [[TMP66]], [[COND_FALSE30]] ] +// CHECK1-NEXT: store float [[COND32]], float* [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP67:%.*]] = load i32, i32* [[TMP61]], align 4 +// CHECK1-NEXT: [[TMP68:%.*]] = bitcast float* [[TMP8]] to i32* +// CHECK1-NEXT: [[TMP69:%.*]] = cmpxchg i32* [[TMP68]], i32 [[TMP60]], i32 [[TMP67]] monotonic monotonic, align 4 +// CHECK1-NEXT: [[TMP70]] = extractvalue { i32, i1 } [[TMP69]], 0 +// CHECK1-NEXT: [[TMP71:%.*]] = extractvalue { i32, i1 } [[TMP69]], 1 +// CHECK1-NEXT: br i1 [[TMP71]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP16]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR16]]) #[[ATTR5]] -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP9]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP16]]) // CHECK1-NEXT: ret void // // @@ -1009,15 +1073,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [10 x [4 x %struct.S]]* noundef nonnull align 4 dereferenceable(160) [[ARRS:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[ARRS_ADDR:%.*]] = alloca [10 x [4 x %struct.S]]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1031,241 +1091,243 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i32* [[ARR]], i32** [[ARR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store [10 x [4 x %struct.S]]* [[ARRS]], [10 x [4 x %struct.S]]** [[ARRS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load [10 x [4 x %struct.S]]*, [10 x [4 x %struct.S]]** [[ARRS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load [10 x [4 x %struct.S]]*, [10 x [4 x %struct.S]]** [[TMP9]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 0 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX4]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] -// CHECK1-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[TMP8]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX5]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint i32* [[ARRAYIDX6]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint i32* [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = add nuw i64 [[TMP12]], 1 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP15:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA7:%.*]] = alloca i32, i64 [[TMP13]], align 16 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i32, i32* [[VLA7]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[VLA7]], [[TMP16]] +// CHECK1-NEXT: [[TMP11:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[TMP11]] +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP8]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP13]] +// CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX3]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP15:%.*]] = ptrtoint i32* [[ARRAYIDX4]] to i64 +// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint i32* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP17:%.*]] = sub i64 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP18:%.*]] = sdiv exact i64 [[TMP17]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP19:%.*]] = add nuw i64 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP21:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP19]], align 16 +// CHECK1-NEXT: store i64 [[TMP19]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i32, i32* [[VLA]], i64 [[TMP19]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[VLA]], [[TMP22]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[VLA7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i32 0, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP22]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint i32* [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint i32* [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[VLA7]], i64 [[TMP20]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], [10 x [4 x %struct.S]]* [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[ARRAYIDX8]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDECAY]], i64 1 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX10]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN11:%.*]] = add nsw i64 0, [[TMP23]] -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], [10 x [4 x %struct.S]]* [[TMP4]], i64 0, i64 [[LB_ADD_LEN11]] -// CHECK1-NEXT: [[ARRAYDECAY13:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[ARRAYIDX12]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDECAY13]], i64 2 -// CHECK1-NEXT: [[TMP24:%.*]] = ptrtoint %struct.S* [[ARRAYIDX14]] to i64 -// CHECK1-NEXT: [[TMP25:%.*]] = ptrtoint %struct.S* [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP26:%.*]] = sub i64 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: [[TMP27:%.*]] = sdiv exact i64 [[TMP26]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP28:%.*]] = add nuw i64 [[TMP27]], 1 -// CHECK1-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP28]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) -// CHECK1-NEXT: [[VLA15:%.*]] = alloca [[STRUCT_S]], i64 [[TMP28]], align 16 -// CHECK1-NEXT: store i64 [[TMP28]], i64* [[__VLA_EXPR1]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[VLA15]], i64 [[TMP28]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY16:%.*]] = icmp eq %struct.S* [[VLA15]], [[TMP30]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY16]], label [[OMP_ARRAYINIT_DONE21:%.*]], label [[OMP_ARRAYINIT_BODY17:%.*]] -// CHECK1: omp.arrayinit.body17: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST18:%.*]] = phi %struct.S* [ [[VLA15]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYINIT_BODY17]] ] -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST18]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST18]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP30]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYINIT_DONE21]], label [[OMP_ARRAYINIT_BODY17]] -// CHECK1: omp.arrayinit.done21: -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast [10 x [4 x %struct.S]]* [[TMP4]] to %struct.S* -// CHECK1-NEXT: [[TMP32:%.*]] = ptrtoint %struct.S* [[TMP31]] to i64 -// CHECK1-NEXT: [[TMP33:%.*]] = ptrtoint %struct.S* [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP34:%.*]] = sub i64 [[TMP32]], [[TMP33]] -// CHECK1-NEXT: [[TMP35:%.*]] = sdiv exact i64 [[TMP34]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[VLA15]], i64 [[TMP35]] -// CHECK1-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP36]] to [10 x [4 x %struct.S]]* -// CHECK1-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP40]], 9 +// CHECK1-NEXT: [[TMP23:%.*]] = ptrtoint i32* [[TMP6]] to i64 +// CHECK1-NEXT: [[TMP24:%.*]] = ptrtoint i32* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP25:%.*]] = sub i64 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: [[TMP26:%.*]] = sdiv exact i64 [[TMP25]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr i32, i32* [[VLA]], i64 [[TMP26]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], [10 x [4 x %struct.S]]* [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[ARRAYIDX5]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAYDECAY]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP8]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 0, [[TMP29]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], [10 x [4 x %struct.S]]* [[TMP10]], i64 0, i64 [[LB_ADD_LEN8]] +// CHECK1-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[ARRAYIDX9]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDECAY10]], i64 2 +// CHECK1-NEXT: [[TMP30:%.*]] = ptrtoint %struct.S* [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP31:%.*]] = ptrtoint %struct.S* [[ARRAYIDX6]] to i64 +// CHECK1-NEXT: [[TMP32:%.*]] = sub i64 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: [[TMP33:%.*]] = sdiv exact i64 [[TMP32]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP34:%.*]] = add nuw i64 [[TMP33]], 1 +// CHECK1-NEXT: [[TMP35:%.*]] = mul nuw i64 [[TMP34]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) +// CHECK1-NEXT: [[VLA12:%.*]] = alloca [[STRUCT_S]], i64 [[TMP34]], align 16 +// CHECK1-NEXT: store i64 [[TMP34]], i64* [[__VLA_EXPR1]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[VLA12]], i64 [[TMP34]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY13:%.*]] = icmp eq %struct.S* [[VLA12]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY13]], label [[OMP_ARRAYINIT_DONE18:%.*]], label [[OMP_ARRAYINIT_BODY14:%.*]] +// CHECK1: omp.arrayinit.body14: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi %struct.S* [ [[VLA12]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYINIT_BODY14]] ] +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST15]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYINIT_DONE18]], label [[OMP_ARRAYINIT_BODY14]] +// CHECK1: omp.arrayinit.done18: +// CHECK1-NEXT: [[TMP37:%.*]] = bitcast [10 x [4 x %struct.S]]* [[TMP10]] to %struct.S* +// CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint %struct.S* [[TMP37]] to i64 +// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint %struct.S* [[ARRAYIDX6]] to i64 +// CHECK1-NEXT: [[TMP40:%.*]] = sub i64 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: [[TMP41:%.*]] = sdiv exact i64 [[TMP40]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[VLA12]], i64 [[TMP41]] +// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[TMP42]] to [10 x [4 x %struct.S]]* +// CHECK1-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP45]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP46]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP41]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP47]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP42]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP48]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP43]], [[TMP44]] -// CHECK1-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP19:%.*]] = icmp sle i32 [[TMP49]], [[TMP50]] +// CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP45]], 1 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP51]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP46:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i64 [[TMP46]] -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP47]] to i64 -// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX23]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[ARRAYIDX24]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP48]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[ARRAYIDX24]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i64 [[TMP52]] +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP53]] to i64 +// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX20]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[ARRAYIDX21]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP54]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[ARRAYIDX21]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP49]], 1 -// CHECK1-NEXT: store i32 [[ADD25]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP55]], 1 +// CHECK1-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP50:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[TMP50]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP51]]) -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP53:%.*]] = bitcast i32* [[VLA7]] to i8* -// CHECK1-NEXT: store i8* [[TMP53]], i8** [[TMP52]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP55:%.*]] = inttoptr i64 [[TMP13]] to i8* -// CHECK1-NEXT: store i8* [[TMP55]], i8** [[TMP54]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP57:%.*]] = bitcast %struct.S* [[VLA15]] to i8* -// CHECK1-NEXT: store i8* [[TMP57]], i8** [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP59:%.*]] = inttoptr i64 [[TMP28]] to i8* +// CHECK1-NEXT: [[TMP56:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load i32, i32* [[TMP56]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP57]]) +// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP59:%.*]] = bitcast i32* [[VLA]] to i8* // CHECK1-NEXT: store i8* [[TMP59]], i8** [[TMP58]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load i32, i32* [[TMP60]], align 4 -// CHECK1-NEXT: [[TMP62:%.*]] = bitcast [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP63:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP61]], i32 2, i64 32, i8* [[TMP62]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP63]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP61:%.*]] = inttoptr i64 [[TMP19]] to i8* +// CHECK1-NEXT: store i8* [[TMP61]], i8** [[TMP60]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP63:%.*]] = bitcast %struct.S* [[VLA12]] to i8* +// CHECK1-NEXT: store i8* [[TMP63]], i8** [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP65:%.*]] = inttoptr i64 [[TMP34]] to i8* +// CHECK1-NEXT: store i8* [[TMP65]], i8** [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load i32, i32* [[TMP66]], align 4 +// CHECK1-NEXT: [[TMP68:%.*]] = bitcast [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP69:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP67]], i32 2, i64 32, i8* [[TMP68]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP69]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr i32, i32* [[ARRAYIDX3]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[ARRAYIDX3]], [[TMP64]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE30:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr i32, i32* [[ARRAYIDX1]], i64 [[TMP19]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[ARRAYIDX1]], [[TMP70]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[VLA7]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi i32* [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT28:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP65:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST26]], align 4 -// CHECK1-NEXT: [[TMP66:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP65]], [[TMP66]] -// CHECK1-NEXT: store i32 [[ADD27]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST26]], align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT28]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi i32* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP71:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST23]], align 4 +// CHECK1-NEXT: [[TMP72:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP71]], [[TMP72]] +// CHECK1-NEXT: store i32 [[ADD24]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST23]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT25]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE29:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT28]], [[TMP64]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE29]], label [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done30: -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX9]], i64 [[TMP28]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY31:%.*]] = icmp eq %struct.S* [[ARRAYIDX9]], [[TMP67]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY31]], label [[OMP_ARRAYCPY_DONE38:%.*]], label [[OMP_ARRAYCPY_BODY32:%.*]] -// CHECK1: omp.arraycpy.body32: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST33:%.*]] = phi %struct.S* [ [[VLA15]], [[OMP_ARRAYCPY_DONE30]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT36:%.*]], [[OMP_ARRAYCPY_BODY32]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST34:%.*]] = phi %struct.S* [ [[ARRAYIDX9]], [[OMP_ARRAYCPY_DONE30]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT35:%.*]], [[OMP_ARRAYCPY_BODY32]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST34]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST33]]) -// CHECK1-NEXT: [[TMP68:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST34]] to i8* -// CHECK1-NEXT: [[TMP69:%.*]] = bitcast %struct.S* [[CALL]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP68]], i8* align 4 [[TMP69]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT35]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST34]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT36]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST33]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE37:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT35]], [[TMP67]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE37]], label [[OMP_ARRAYCPY_DONE38]], label [[OMP_ARRAYCPY_BODY32]] -// CHECK1: omp.arraycpy.done38: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP61]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT25]], [[TMP70]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done27: +// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX6]], i64 [[TMP34]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY28:%.*]] = icmp eq %struct.S* [[ARRAYIDX6]], [[TMP73]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY28]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY29:%.*]] +// CHECK1: omp.arraycpy.body29: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST30:%.*]] = phi %struct.S* [ [[VLA12]], [[OMP_ARRAYCPY_DONE27]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[OMP_ARRAYCPY_BODY29]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST31:%.*]] = phi %struct.S* [ [[ARRAYIDX6]], [[OMP_ARRAYCPY_DONE27]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[OMP_ARRAYCPY_BODY29]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST31]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST30]]) +// CHECK1-NEXT: [[TMP74:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST31]] to i8* +// CHECK1-NEXT: [[TMP75:%.*]] = bitcast %struct.S* [[CALL]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP74]], i8* align 4 [[TMP75]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST31]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST30]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP73]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY29]] +// CHECK1: omp.arraycpy.done35: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP67]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr i32, i32* [[ARRAYIDX3]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY39:%.*]] = icmp eq i32* [[ARRAYIDX3]], [[TMP70]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY39]], label [[OMP_ARRAYCPY_DONE46:%.*]], label [[OMP_ARRAYCPY_BODY40:%.*]] -// CHECK1: omp.arraycpy.body40: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST41:%.*]] = phi i32* [ [[VLA7]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT44:%.*]], [[OMP_ARRAYCPY_BODY40]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST42:%.*]] = phi i32* [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT43:%.*]], [[OMP_ARRAYCPY_BODY40]] ] -// CHECK1-NEXT: [[TMP71:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST41]], align 4 -// CHECK1-NEXT: [[TMP72:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST42]], i32 [[TMP71]] monotonic, align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT43]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST42]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT44]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST41]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE45:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT43]], [[TMP70]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE45]], label [[OMP_ARRAYCPY_DONE46]], label [[OMP_ARRAYCPY_BODY40]] -// CHECK1: omp.arraycpy.done46: -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX9]], i64 [[TMP28]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY47:%.*]] = icmp eq %struct.S* [[ARRAYIDX9]], [[TMP73]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY47]], label [[OMP_ARRAYCPY_DONE55:%.*]], label [[OMP_ARRAYCPY_BODY48:%.*]] -// CHECK1: omp.arraycpy.body48: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST49:%.*]] = phi %struct.S* [ [[VLA15]], [[OMP_ARRAYCPY_DONE46]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT53:%.*]], [[OMP_ARRAYCPY_BODY48]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST50:%.*]] = phi %struct.S* [ [[ARRAYIDX9]], [[OMP_ARRAYCPY_DONE46]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT52:%.*]], [[OMP_ARRAYCPY_BODY48]] ] -// CHECK1-NEXT: [[TMP74:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP75:%.*]] = load i32, i32* [[TMP74]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP75]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL51:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST50]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST49]]) -// CHECK1-NEXT: [[TMP76:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST50]] to i8* -// CHECK1-NEXT: [[TMP77:%.*]] = bitcast %struct.S* [[CALL51]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP76]], i8* align 4 [[TMP77]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP75]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT52]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST50]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT53]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST49]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE54:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT52]], [[TMP73]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE54]], label [[OMP_ARRAYCPY_DONE55]], label [[OMP_ARRAYCPY_BODY48]] -// CHECK1: omp.arraycpy.done55: +// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr i32, i32* [[ARRAYIDX1]], i64 [[TMP19]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY36:%.*]] = icmp eq i32* [[ARRAYIDX1]], [[TMP76]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY36]], label [[OMP_ARRAYCPY_DONE43:%.*]], label [[OMP_ARRAYCPY_BODY37:%.*]] +// CHECK1: omp.arraycpy.body37: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST38:%.*]] = phi i32* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT41:%.*]], [[OMP_ARRAYCPY_BODY37]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST39:%.*]] = phi i32* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT40:%.*]], [[OMP_ARRAYCPY_BODY37]] ] +// CHECK1-NEXT: [[TMP77:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST38]], align 4 +// CHECK1-NEXT: [[TMP78:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST39]], i32 [[TMP77]] monotonic, align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT40]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST39]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT41]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST38]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE42:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT40]], [[TMP76]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE42]], label [[OMP_ARRAYCPY_DONE43]], label [[OMP_ARRAYCPY_BODY37]] +// CHECK1: omp.arraycpy.done43: +// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX6]], i64 [[TMP34]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY44:%.*]] = icmp eq %struct.S* [[ARRAYIDX6]], [[TMP79]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY44]], label [[OMP_ARRAYCPY_DONE52:%.*]], label [[OMP_ARRAYCPY_BODY45:%.*]] +// CHECK1: omp.arraycpy.body45: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST46:%.*]] = phi %struct.S* [ [[VLA12]], [[OMP_ARRAYCPY_DONE43]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT50:%.*]], [[OMP_ARRAYCPY_BODY45]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST47:%.*]] = phi %struct.S* [ [[ARRAYIDX6]], [[OMP_ARRAYCPY_DONE43]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT49:%.*]], [[OMP_ARRAYCPY_BODY45]] ] +// CHECK1-NEXT: [[TMP80:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load i32, i32* [[TMP80]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP81]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL48:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST47]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST46]]) +// CHECK1-NEXT: [[TMP82:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST47]] to i8* +// CHECK1-NEXT: [[TMP83:%.*]] = bitcast %struct.S* [[CALL48]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP82]], i8* align 4 [[TMP83]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP81]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT49]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST47]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT50]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST46]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE51:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT49]], [[TMP79]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE51]], label [[OMP_ARRAYCPY_DONE52]], label [[OMP_ARRAYCPY_BODY45]] +// CHECK1: omp.arraycpy.done52: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[VLA15]], i64 [[TMP28]] -// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S* [[VLA15]], [[TMP78]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE56:%.*]], label [[ARRAYDESTROY_BODY:%.*]] +// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[VLA12]], i64 [[TMP34]] +// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S* [[VLA12]], [[TMP84]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE53:%.*]], label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP78]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP84]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[VLA15]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE56]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done56: -// CHECK1-NEXT: [[TMP79:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP79]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[VLA12]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE53]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done53: +// CHECK1-NEXT: [[TMP85:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP85]]) // CHECK1-NEXT: ret void // // @@ -1332,14 +1394,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARR:%.*]], [10 x [4 x %struct.S]]* noundef nonnull align 4 dereferenceable(160) [[ARRS:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARRS_ADDR:%.*]] = alloca [10 x [4 x %struct.S]]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1348,205 +1407,207 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARRS4:%.*]] = alloca [10 x [4 x %struct.S]], align 16 +// CHECK1-NEXT: [[ARRS:%.*]] = alloca [10 x [4 x %struct.S]], align 16 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i32* [[ARR]], i32** [[ARR_ADDR]], align 8 -// CHECK1-NEXT: store [10 x [4 x %struct.S]]* [[ARRS]], [10 x [4 x %struct.S]]** [[ARRS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [4 x %struct.S]]*, [10 x [4 x %struct.S]]** [[ARRS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load [10 x [4 x %struct.S]]*, [10 x [4 x %struct.S]]** [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], [[TMP1]] -// CHECK1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -// CHECK1-NEXT: [[TMP6:%.*]] = udiv exact i64 [[TMP5]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP7:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP7]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA3:%.*]] = alloca i32, i64 [[TMP6]], align 16 -// CHECK1-NEXT: store i64 [[TMP6]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[VLA3]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[VLA3]], [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP2]], [[TMP4]] +// CHECK1-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 +// CHECK1-NEXT: [[TMP11:%.*]] = udiv exact i64 [[TMP10]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP12:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP12]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP11]], align 16 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[VLA]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[VLA]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[VLA3]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i32 0, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], [10 x [4 x %struct.S]]* [[ARRS4]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY5:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP9]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY5]], label [[OMP_ARRAYINIT_DONE10:%.*]], label [[OMP_ARRAYINIT_BODY6:%.*]] -// CHECK1: omp.arrayinit.body6: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST7:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYINIT_BODY6]] ] -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST7]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST7]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYINIT_DONE10]], label [[OMP_ARRAYINIT_BODY6]] -// CHECK1: omp.arrayinit.done10: -// CHECK1-NEXT: [[LHS_BEGIN:%.*]] = bitcast [10 x [4 x %struct.S]]* [[TMP3]] to %struct.S* -// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [10 x [4 x %struct.S]]* [[ARRS4]] to %struct.S* -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 9 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], [10 x [4 x %struct.S]]* [[ARRS]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY1:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY1]], label [[OMP_ARRAYINIT_DONE6:%.*]], label [[OMP_ARRAYINIT_BODY2:%.*]] +// CHECK1: omp.arrayinit.body2: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYINIT_BODY2]] ] +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST3]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYINIT_DONE6]], label [[OMP_ARRAYINIT_BODY2]] +// CHECK1: omp.arrayinit.done6: +// CHECK1-NEXT: [[LHS_BEGIN:%.*]] = bitcast [10 x [4 x %struct.S]]* [[TMP8]] to %struct.S* +// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [10 x [4 x %struct.S]]* [[ARRS]] to %struct.S* +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA3]], i64 [[TMP18]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARRAYIDX12]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[ARRAYIDX12]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[ARRAYIDX8]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i32* [[VLA3]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP6]] to i8* -// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP29:%.*]] = bitcast %struct.S* [[RHS_BEGIN]] to i8* -// CHECK1-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP31]], i32 2, i64 24, i8* [[TMP32]], void (i8*, i8*)* @.omp.reduction.reduction_func.4, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP33]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast i32* [[VLA]] to i8* +// CHECK1-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP11]] to i8* +// CHECK1-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[RHS_BEGIN]] to i8* +// CHECK1-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP36]], i32 2, i64 24, i8* [[TMP37]], void (i8*, i8*)* @.omp.reduction.reduction_func.4, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP38]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr i32, i32* [[TMP2]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[TMP2]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr i32, i32* [[TMP6]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[TMP6]], [[TMP39]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[VLA3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi i32* [ [[TMP2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK1-NEXT: store i32 [[ADD15]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi i32* [ [[TMP6]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT12]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[LHS_BEGIN]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq %struct.S* [[LHS_BEGIN]], [[TMP37]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE26:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] -// CHECK1: omp.arraycpy.body20: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[OMP_ARRAYCPY_DONE18]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY20]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi %struct.S* [ [[LHS_BEGIN]], [[OMP_ARRAYCPY_DONE18]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY20]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST22]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST21]]) -// CHECK1-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST22]] to i8* -// CHECK1-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[CALL]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT24]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP37]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_BODY20]] -// CHECK1: omp.arraycpy.done26: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP31]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT12]], [[TMP39]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done14: +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[LHS_BEGIN]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY15:%.*]] = icmp eq %struct.S* [[LHS_BEGIN]], [[TMP42]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY15]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY16:%.*]] +// CHECK1: omp.arraycpy.body16: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST17:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[OMP_ARRAYCPY_DONE14]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY16]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST18:%.*]] = phi %struct.S* [ [[LHS_BEGIN]], [[OMP_ARRAYCPY_DONE14]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY16]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST18]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST17]]) +// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST18]] to i8* +// CHECK1-NEXT: [[TMP44:%.*]] = bitcast %struct.S* [[CALL]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP43]], i8* align 4 [[TMP44]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST18]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT20]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST17]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP42]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY16]] +// CHECK1: omp.arraycpy.done22: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr i32, i32* [[TMP2]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY27:%.*]] = icmp eq i32* [[TMP2]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY27]], label [[OMP_ARRAYCPY_DONE34:%.*]], label [[OMP_ARRAYCPY_BODY28:%.*]] -// CHECK1: omp.arraycpy.body28: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST29:%.*]] = phi i32* [ [[VLA3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT32:%.*]], [[OMP_ARRAYCPY_BODY28]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST30:%.*]] = phi i32* [ [[TMP2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT31:%.*]], [[OMP_ARRAYCPY_BODY28]] ] -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST29]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST30]], i32 [[TMP41]] monotonic, align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT31]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST30]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT32]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST29]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE33:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT31]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_BODY28]] -// CHECK1: omp.arraycpy.done34: -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[LHS_BEGIN]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY35:%.*]] = icmp eq %struct.S* [[LHS_BEGIN]], [[TMP43]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY35]], label [[OMP_ARRAYCPY_DONE43:%.*]], label [[OMP_ARRAYCPY_BODY36:%.*]] -// CHECK1: omp.arraycpy.body36: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST37:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[OMP_ARRAYCPY_DONE34]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT41:%.*]], [[OMP_ARRAYCPY_BODY36]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST38:%.*]] = phi %struct.S* [ [[LHS_BEGIN]], [[OMP_ARRAYCPY_DONE34]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT40:%.*]], [[OMP_ARRAYCPY_BODY36]] ] -// CHECK1-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP45]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL39:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST38]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST37]]) -// CHECK1-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST38]] to i8* -// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[CALL39]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP45]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT40]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST38]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT41]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST37]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE42:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT40]], [[TMP43]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE42]], label [[OMP_ARRAYCPY_DONE43]], label [[OMP_ARRAYCPY_BODY36]] -// CHECK1: omp.arraycpy.done43: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP31]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr i32, i32* [[TMP6]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq i32* [[TMP6]], [[TMP45]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE30:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] +// CHECK1: omp.arraycpy.body24: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi i32* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT28:%.*]], [[OMP_ARRAYCPY_BODY24]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi i32* [ [[TMP6]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT27:%.*]], [[OMP_ARRAYCPY_BODY24]] ] +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 [[TMP46]] monotonic, align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT27]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT28]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE29:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT27]], [[TMP45]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE29]], label [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_BODY24]] +// CHECK1: omp.arraycpy.done30: +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[LHS_BEGIN]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY31:%.*]] = icmp eq %struct.S* [[LHS_BEGIN]], [[TMP48]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY31]], label [[OMP_ARRAYCPY_DONE39:%.*]], label [[OMP_ARRAYCPY_BODY32:%.*]] +// CHECK1: omp.arraycpy.body32: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST33:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[OMP_ARRAYCPY_DONE30]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT37:%.*]], [[OMP_ARRAYCPY_BODY32]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST34:%.*]] = phi %struct.S* [ [[LHS_BEGIN]], [[OMP_ARRAYCPY_DONE30]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT36:%.*]], [[OMP_ARRAYCPY_BODY32]] ] +// CHECK1-NEXT: [[TMP49:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[TMP49]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP50]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL35:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST34]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST33]]) +// CHECK1-NEXT: [[TMP51:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST34]] to i8* +// CHECK1-NEXT: [[TMP52:%.*]] = bitcast %struct.S* [[CALL35]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP51]], i8* align 4 [[TMP52]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP50]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT36]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST34]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT37]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST33]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE38:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT36]], [[TMP48]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE38]], label [[OMP_ARRAYCPY_DONE39]], label [[OMP_ARRAYCPY_BODY32]] +// CHECK1: omp.arraycpy.done39: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN44:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], [10 x [4 x %struct.S]]* [[ARRS4]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN44]], i64 40 +// CHECK1-NEXT: [[ARRAY_BEGIN40:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], [10 x [4 x %struct.S]]* [[ARRS]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN40]], i64 40 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP48]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP53]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN44]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE45:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done45: -// CHECK1-NEXT: [[TMP49:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP49]]) -// CHECK1-NEXT: [[TMP50:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[TMP50]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP51]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN40]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE41:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done41: +// CHECK1-NEXT: [[TMP54:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP54]]) +// CHECK1-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP56]]) // CHECK1-NEXT: ret void // // @@ -1610,156 +1671,156 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARR6:%.*]] = alloca [1 x [2 x i32]], align 4 +// CHECK1-NEXT: [[ARR:%.*]] = alloca [1 x [2 x i32]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i32* [[ARR]], i32** [[ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[ARR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[TMP3]] -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[TMP4]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX4]], i64 1 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [2 x i32]], [1 x [2 x i32]]* [[ARR6]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP5]] +// CHECK1-NEXT: [[TMP7:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[TMP7]] +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 0 +// CHECK1-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[TMP8]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX2]], i64 1 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [2 x i32]], [1 x [2 x i32]]* [[ARR]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP9]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i32 0, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP6:%.*]] = ptrtoint i32* [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint i32* [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP9:%.*]] = sdiv exact i64 [[TMP8]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [1 x [2 x i32]]* [[ARR6]] to i32* -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP10]], i64 [[TMP9]] -// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [1 x [2 x i32]]* [[ARR6]] to i32* -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 9 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint i32* [[TMP6]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint i32* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast [1 x [2 x i32]]* [[ARR]] to i32* +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr i32, i32* [[TMP14]], i64 [[TMP13]] +// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [1 x [2 x i32]]* [[ARR]] to i32* +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i64 [[TMP20]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX8]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[ARRAYIDX9]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i64 [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX5]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[ARRAYIDX6]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* -// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], i32 1, i64 8, i8* [[TMP30]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* +// CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP35:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP33]], i32 1, i64 8, i8* [[TMP34]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP35]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr i32, i32* [[ARRAYIDX3]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[ARRAYIDX3]], [[TMP32]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE15:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr i32, i32* [[ARRAYIDX1]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[ARRAYIDX1]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi i32* [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT13:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT13]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi i32* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT10]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE14:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT13]], [[TMP32]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done15: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT10]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done12: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP33]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr i32, i32* [[ARRAYIDX3]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY16:%.*]] = icmp eq i32* [[ARRAYIDX3]], [[TMP35]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY16]], label [[OMP_ARRAYCPY_DONE23:%.*]], label [[OMP_ARRAYCPY_BODY17:%.*]] -// CHECK1: omp.arraycpy.body17: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST18:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY17]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST19:%.*]] = phi i32* [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY17]] ] -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST18]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32 [[TMP36]] monotonic, align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT21]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST18]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE22:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP35]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_DONE23]], label [[OMP_ARRAYCPY_BODY17]] -// CHECK1: omp.arraycpy.done23: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr i32, i32* [[ARRAYIDX1]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY13:%.*]] = icmp eq i32* [[ARRAYIDX1]], [[TMP39]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY13]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY14:%.*]] +// CHECK1: omp.arraycpy.body14: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST15:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY14]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi i32* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY14]] ] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST15]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 [[TMP40]] monotonic, align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT18]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP39]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY14]] +// CHECK1: omp.arraycpy.done20: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP33]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP39]]) +// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP43]]) // CHECK1-NEXT: ret void // // @@ -1799,11 +1860,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S*** noundef nonnull align 8 dereferenceable(8) [[VAR2:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VAR2_ADDR:%.*]] = alloca %struct.S***, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1818,162 +1879,164 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S*** [[VAR2]], %struct.S**** [[VAR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S***, %struct.S**** [[VAR2_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S***, %struct.S**** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S**, %struct.S*** [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds %struct.S*, %struct.S** [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S**, %struct.S*** [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds %struct.S*, %struct.S** [[TMP3]], i64 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S*, %struct.S** [[ARRAYIDX2]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP4]], i64 6 -// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint %struct.S* [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP6:%.*]] = ptrtoint %struct.S* [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = sub i64 [[TMP5]], [[TMP6]] -// CHECK1-NEXT: [[TMP8:%.*]] = sdiv exact i64 [[TMP7]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP9:%.*]] = add nuw i64 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca [[STRUCT_S]], i64 [[TMP9]], align 16 -// CHECK1-NEXT: store i64 [[TMP9]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[VLA]], i64 [[TMP9]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[VLA]], [[TMP12]] +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S**, %struct.S*** [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds %struct.S*, %struct.S** [[TMP3]], i64 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S*, %struct.S** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S**, %struct.S*** [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds %struct.S*, %struct.S** [[TMP5]], i64 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S*, %struct.S** [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP6]], i64 6 +// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint %struct.S* [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint %struct.S* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP13:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP13]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca [[STRUCT_S]], i64 [[TMP11]], align 16 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[VLA]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[VLA]], [[TMP14]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP13:%.*]] = load %struct.S**, %struct.S*** [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load %struct.S*, %struct.S** [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = ptrtoint %struct.S* [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint %struct.S* [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = sub i64 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: [[TMP18:%.*]] = sdiv exact i64 [[TMP17]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[VLA]], i64 [[TMP18]] +// CHECK1-NEXT: [[TMP15:%.*]] = load %struct.S**, %struct.S*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint %struct.S* [[TMP16]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint %struct.S* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[VLA]], i64 [[TMP20]] // CHECK1-NEXT: store %struct.S** [[_TMP5]], %struct.S*** [[_TMP4]], align 8 -// CHECK1-NEXT: store %struct.S* [[TMP19]], %struct.S** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK1-NEXT: store %struct.S* [[TMP21]], %struct.S** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[VLA]] to i8* -// CHECK1-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[VLA]] to i8* // CHECK1-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP36]], i32 1, i64 16, i8* [[TMP37]], void (i8*, i8*)* @.omp.reduction.reduction_func.8, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP38]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP36:%.*]] = inttoptr i64 [[TMP11]] to i8* +// CHECK1-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP40:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP38]], i32 1, i64 16, i8* [[TMP39]], void (i8*, i8*)* @.omp.reduction.reduction_func.8, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP40]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX1]], i64 [[TMP9]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAYIDX1]], [[TMP39]] +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX1]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAYIDX1]], [[TMP41]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi %struct.S* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST8]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: [[TMP40:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST8]] to i8* -// CHECK1-NEXT: [[TMP41:%.*]] = bitcast %struct.S* [[CALL]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST8]] to i8* +// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[CALL]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i64 4, i1 false) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE10:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP39]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE10:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP41]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done11: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP38]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX1]], i64 [[TMP9]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY12:%.*]] = icmp eq %struct.S* [[ARRAYIDX1]], [[TMP42]] +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX1]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY12:%.*]] = icmp eq %struct.S* [[ARRAYIDX1]], [[TMP44]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY12]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY13:%.*]] // CHECK1: omp.arraycpy.body13: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST14:%.*]] = phi %struct.S* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY13]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi %struct.S* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY13]] ] -// CHECK1-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP44]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP46]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[CALL16:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST15]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST14]]) -// CHECK1-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST15]] to i8* -// CHECK1-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[CALL16]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP44]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST15]] to i8* +// CHECK1-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[CALL16]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP46]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT18]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST14]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP42]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP44]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY13]] // CHECK1: omp.arraycpy.done20: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP38]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[VLA]], i64 [[TMP9]] -// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S* [[VLA]], [[TMP47]] +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[VLA]], i64 [[TMP11]] +// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S* [[VLA]], [[TMP49]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP47]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP49]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[VLA]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done21: -// CHECK1-NEXT: [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP48]]) -// CHECK1-NEXT: [[TMP49:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[TMP49]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP50]]) +// CHECK1-NEXT: [[TMP50:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP50]]) +// CHECK1-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP52]]) // CHECK1-NEXT: ret void // // @@ -2016,168 +2079,170 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S*** noundef nonnull align 8 dereferenceable(8) [[VAR2:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VAR2_ADDR:%.*]] = alloca %struct.S***, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR24:%.*]] = alloca [1 x [6 x %struct.S]], align 16 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca %struct.S**, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 8 +// CHECK1-NEXT: [[VAR2:%.*]] = alloca [1 x [6 x %struct.S]], align 16 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca %struct.S**, align 8 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S*** [[VAR2]], %struct.S**** [[VAR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S***, %struct.S**** [[VAR2_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S***, %struct.S**** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S**, %struct.S*** [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds %struct.S*, %struct.S** [[TMP1]], i64 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S**, %struct.S*** [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds %struct.S*, %struct.S** [[TMP3]], i64 1 -// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S*, %struct.S** [[ARRAYIDX2]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP4]], i64 6 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], [1 x [6 x %struct.S]]* [[VAR24]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 6 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP5]] +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S**, %struct.S*** [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds %struct.S*, %struct.S** [[TMP3]], i64 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S*, %struct.S** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S**, %struct.S*** [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds %struct.S*, %struct.S** [[TMP5]], i64 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S*, %struct.S** [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP6]], i64 6 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], [1 x [6 x %struct.S]]* [[VAR2]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 6 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S**, %struct.S*** [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint %struct.S* [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint %struct.S* [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [1 x [6 x %struct.S]]* [[VAR24]] to %struct.S* -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP12]], i64 [[TMP11]] -// CHECK1-NEXT: store %struct.S** [[_TMP6]], %struct.S*** [[_TMP5]], align 8 -// CHECK1-NEXT: store %struct.S* [[TMP13]], %struct.S** [[_TMP6]], align 8 -// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [1 x [6 x %struct.S]]* [[VAR24]] to %struct.S* -// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 9 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S**, %struct.S*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load %struct.S*, %struct.S** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint %struct.S* [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint %struct.S* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast [1 x [6 x %struct.S]]* [[VAR2]] to %struct.S* +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP14]], i64 [[TMP13]] +// CHECK1-NEXT: store %struct.S** [[_TMP5]], %struct.S*** [[_TMP4]], align 8 +// CHECK1-NEXT: store %struct.S* [[TMP15]], %struct.S** [[_TMP5]], align 8 +// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [1 x [6 x %struct.S]]* [[VAR2]] to %struct.S* +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[RHS_BEGIN]] to i8* -// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, i8* [[TMP29]], void (i8*, i8*)* @.omp.reduction.reduction_func.10, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast %struct.S* [[RHS_BEGIN]] to i8* +// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP30]], i32 1, i64 8, i8* [[TMP31]], void (i8*, i8*)* @.omp.reduction.reduction_func.10, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP32]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX1]], i64 6 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAYIDX1]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX1]], i64 6 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAYIDX1]], [[TMP33]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi %struct.S* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST9]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST9]] to i8* -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[CALL]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT10]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi %struct.S* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST8]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST8]] to i8* +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[CALL]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT10]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done12: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE10:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP33]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done11: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX1]], i64 6 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY13:%.*]] = icmp eq %struct.S* [[ARRAYIDX1]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY13]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY14:%.*]] -// CHECK1: omp.arraycpy.body14: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST15:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY14]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi %struct.S* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY14]] ] -// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL17:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST16]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST15]]) -// CHECK1-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST16]] to i8* -// CHECK1-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[CALL17]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT19]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST15]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY14]] -// CHECK1: omp.arraycpy.done21: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX1]], i64 6 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY12:%.*]] = icmp eq %struct.S* [[ARRAYIDX1]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY12]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY13:%.*]] +// CHECK1: omp.arraycpy.body13: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST14:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY13]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi %struct.S* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY13]] ] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP38]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL16:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST15]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST14]]) +// CHECK1-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST15]] to i8* +// CHECK1-NEXT: [[TMP40:%.*]] = bitcast %struct.S* [[CALL16]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP39]], i8* align 4 [[TMP40]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP38]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT18]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST14]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY13]] +// CHECK1: omp.arraycpy.done20: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN22:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], [1 x [6 x %struct.S]]* [[VAR24]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN22]], i64 6 +// CHECK1-NEXT: [[ARRAY_BEGIN21:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], [1 x [6 x %struct.S]]* [[VAR2]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN21]], i64 6 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP39]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP41]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN22]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE23:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done23: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP41]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN21]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE22:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done22: +// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP43]]) // CHECK1-NEXT: ret void // // @@ -2217,168 +2282,170 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S*** noundef nonnull align 8 dereferenceable(8) [[VAR2:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VAR2_ADDR:%.*]] = alloca %struct.S***, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR24:%.*]] = alloca [1 x [6 x %struct.S]], align 16 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca %struct.S**, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 8 +// CHECK1-NEXT: [[VAR2:%.*]] = alloca [1 x [6 x %struct.S]], align 16 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca %struct.S**, align 8 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S*** [[VAR2]], %struct.S**** [[VAR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S***, %struct.S**** [[VAR2_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S***, %struct.S**** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S**, %struct.S*** [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds %struct.S*, %struct.S** [[TMP1]], i64 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S**, %struct.S*** [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds %struct.S*, %struct.S** [[TMP3]], i64 1 -// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S*, %struct.S** [[ARRAYIDX2]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP4]], i64 6 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], [1 x [6 x %struct.S]]* [[VAR24]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 6 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP5]] +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S**, %struct.S*** [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds %struct.S*, %struct.S** [[TMP3]], i64 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S*, %struct.S** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S**, %struct.S*** [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds %struct.S*, %struct.S** [[TMP5]], i64 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S*, %struct.S** [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP6]], i64 6 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], [1 x [6 x %struct.S]]* [[VAR2]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 6 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S**, %struct.S*** [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint %struct.S* [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint %struct.S* [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [1 x [6 x %struct.S]]* [[VAR24]] to %struct.S* -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP12]], i64 [[TMP11]] -// CHECK1-NEXT: store %struct.S** [[_TMP6]], %struct.S*** [[_TMP5]], align 8 -// CHECK1-NEXT: store %struct.S* [[TMP13]], %struct.S** [[_TMP6]], align 8 -// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [1 x [6 x %struct.S]]* [[VAR24]] to %struct.S* -// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 9 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S**, %struct.S*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load %struct.S*, %struct.S** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint %struct.S* [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint %struct.S* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast [1 x [6 x %struct.S]]* [[VAR2]] to %struct.S* +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP14]], i64 [[TMP13]] +// CHECK1-NEXT: store %struct.S** [[_TMP5]], %struct.S*** [[_TMP4]], align 8 +// CHECK1-NEXT: store %struct.S* [[TMP15]], %struct.S** [[_TMP5]], align 8 +// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [1 x [6 x %struct.S]]* [[VAR2]] to %struct.S* +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[RHS_BEGIN]] to i8* -// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, i8* [[TMP29]], void (i8*, i8*)* @.omp.reduction.reduction_func.12, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast %struct.S* [[RHS_BEGIN]] to i8* +// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP30]], i32 1, i64 8, i8* [[TMP31]], void (i8*, i8*)* @.omp.reduction.reduction_func.12, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP32]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX1]], i64 6 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAYIDX1]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX1]], i64 6 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAYIDX1]], [[TMP33]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi %struct.S* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST9]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST9]] to i8* -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[CALL]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT10]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi %struct.S* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST8]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST8]] to i8* +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[CALL]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT10]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done12: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE10:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP33]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done11: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX1]], i64 6 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY13:%.*]] = icmp eq %struct.S* [[ARRAYIDX1]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY13]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY14:%.*]] -// CHECK1: omp.arraycpy.body14: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST15:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY14]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi %struct.S* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY14]] ] -// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL17:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST16]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST15]]) -// CHECK1-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST16]] to i8* -// CHECK1-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[CALL17]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT19]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST15]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY14]] -// CHECK1: omp.arraycpy.done21: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX1]], i64 6 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY12:%.*]] = icmp eq %struct.S* [[ARRAYIDX1]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY12]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY13:%.*]] +// CHECK1: omp.arraycpy.body13: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST14:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY13]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi %struct.S* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY13]] ] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP38]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL16:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST15]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST14]]) +// CHECK1-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST15]] to i8* +// CHECK1-NEXT: [[TMP40:%.*]] = bitcast %struct.S* [[CALL16]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP39]], i8* align 4 [[TMP40]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP38]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT18]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST14]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY13]] +// CHECK1: omp.arraycpy.done20: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN22:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], [1 x [6 x %struct.S]]* [[VAR24]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN22]], i64 6 +// CHECK1-NEXT: [[ARRAY_BEGIN21:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], [1 x [6 x %struct.S]]* [[VAR2]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN21]], i64 6 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP39]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP41]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN22]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE23:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done23: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP41]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN21]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE22:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done22: +// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP43]]) // CHECK1-NEXT: ret void // // @@ -2418,117 +2485,119 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S*** noundef nonnull align 8 dereferenceable(8) [[VAR2:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VAR2_ADDR:%.*]] = alloca %struct.S***, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR24:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca %struct.S**, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 8 +// CHECK1-NEXT: [[VAR2:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca %struct.S**, align 8 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S*** [[VAR2]], %struct.S**** [[VAR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S***, %struct.S**** [[VAR2_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S***, %struct.S**** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S**, %struct.S*** [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds %struct.S*, %struct.S** [[TMP1]], i64 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S**, %struct.S*** [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds %struct.S*, %struct.S** [[TMP3]], i64 1 -// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S*, %struct.S** [[ARRAYIDX2]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP4]], i64 1 -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR24]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S**, %struct.S*** [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S*, %struct.S** [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint %struct.S* [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint %struct.S* [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[VAR24]], i64 [[TMP10]] -// CHECK1-NEXT: store %struct.S** [[_TMP6]], %struct.S*** [[_TMP5]], align 8 -// CHECK1-NEXT: store %struct.S* [[TMP11]], %struct.S** [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S**, %struct.S*** [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds %struct.S*, %struct.S** [[TMP3]], i64 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S*, %struct.S** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S**, %struct.S*** [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds %struct.S*, %struct.S** [[TMP5]], i64 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S*, %struct.S** [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP6]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR2]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.S**, %struct.S*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint %struct.S* [[TMP8]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint %struct.S* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[VAR2]], i64 [[TMP12]] +// CHECK1-NEXT: store %struct.S** [[_TMP5]], %struct.S*** [[_TMP4]], align 8 +// CHECK1-NEXT: store %struct.S* [[TMP13]], %struct.S** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]]) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.S* [[VAR24]] to i8* -// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 1, i64 8, i8* [[TMP23]], void (i8*, i8*)* @.omp.reduction.reduction_func.14, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]]) +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast %struct.S* [[VAR2]] to i8* +// CHECK1-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP15]], i32 1, i64 8, i8* [[TMP25]], void (i8*, i8*)* @.omp.reduction.reduction_func.14, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP26]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX1]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast %struct.S* [[ARRAYIDX1]] to i8* -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[CALL]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] -// CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL9:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX1]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR24]]) +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX1]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR2]]) // CHECK1-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[ARRAYIDX1]] to i8* -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast %struct.S* [[CALL9]] to i8* +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast %struct.S* [[CALL]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP15]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK1: .omp.reduction.case2: +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL8:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX1]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR2]]) +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast %struct.S* [[ARRAYIDX1]] to i8* +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[CALL8]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP15]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR24]]) #[[ATTR5]] -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP13]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR2]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP15]]) // CHECK1-NEXT: ret void // // @@ -2557,158 +2626,160 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [5 x %struct.S]* noundef nonnull align 4 dereferenceable(20) [[VVAR2:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VVAR2_ADDR:%.*]] = alloca [5 x %struct.S]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VVAR22:%.*]] = alloca [5 x %struct.S], align 16 +// CHECK1-NEXT: [[VVAR2:%.*]] = alloca [5 x %struct.S], align 16 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [5 x %struct.S]* [[VVAR2]], [5 x %struct.S]** [[VVAR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[VVAR2_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP0]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP0]], i64 0, i64 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[VVAR22]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP1]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP2]], i64 0, i64 4 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[VVAR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP3]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP2:%.*]] = bitcast [5 x %struct.S]* [[TMP0]] to %struct.S* -// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint %struct.S* [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint %struct.S* [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]] -// CHECK1-NEXT: [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [5 x %struct.S]* [[VVAR22]] to %struct.S* -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP7]], i64 [[TMP6]] -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.S* [[TMP8]] to [5 x %struct.S]* -// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [5 x %struct.S]* [[VVAR22]] to %struct.S* -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 9 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast [5 x %struct.S]* [[TMP2]] to %struct.S* +// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint %struct.S* [[TMP4]] to i64 +// CHECK1-NEXT: [[TMP6:%.*]] = ptrtoint %struct.S* [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP7:%.*]] = sub i64 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = sdiv exact i64 [[TMP7]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [5 x %struct.S]* [[VVAR2]] to %struct.S* +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP9]], i64 [[TMP8]] +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[TMP10]] to [5 x %struct.S]* +// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [5 x %struct.S]* [[VVAR2]] to %struct.S* +// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.S* [[RHS_BEGIN]] to i8* -// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, i8* [[TMP25]], void (i8*, i8*)* @.omp.reduction.reduction_func.16, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP26]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast %struct.S* [[RHS_BEGIN]] to i8* +// CHECK1-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]], i32 1, i64 8, i8* [[TMP27]], void (i8*, i8*)* @.omp.reduction.reduction_func.16, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP28]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP27]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 5 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP29]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST5:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT6:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST5]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST5]] to i8* -// CHECK1-NEXT: [[TMP29:%.*]] = bitcast %struct.S* [[CALL]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT6]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST5]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST4:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST4]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST4]] to i8* +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[CALL]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST4]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE7:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT6]], [[TMP27]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done8: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP24]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE6:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP29]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done7: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY9:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP30]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY9]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY10:%.*]] -// CHECK1: omp.arraycpy.body10: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST11:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY10]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT14:%.*]], [[OMP_ARRAYCPY_BODY10]] ] -// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL13:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST12]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST11]]) -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST12]] to i8* -// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[CALL13]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT14]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT15]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST11]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT14]], [[TMP30]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY10]] -// CHECK1: omp.arraycpy.done17: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP24]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 5 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY8:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP32]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY8]], label [[OMP_ARRAYCPY_DONE16:%.*]], label [[OMP_ARRAYCPY_BODY9:%.*]] +// CHECK1: omp.arraycpy.body9: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST10:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT14:%.*]], [[OMP_ARRAYCPY_BODY9]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT13:%.*]], [[OMP_ARRAYCPY_BODY9]] ] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL12:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST11]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST10]]) +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST11]] to i8* +// CHECK1-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[CALL12]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT13]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT14]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST10]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE15:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT13]], [[TMP32]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_BODY9]] +// CHECK1: omp.arraycpy.done16: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN18:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[VVAR22]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN18]], i64 5 +// CHECK1-NEXT: [[ARRAY_BEGIN17:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[VVAR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN17]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP35]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP37]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN18]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE19:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done19: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP37]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN17]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE18:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done18: +// CHECK1-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP39]]) // CHECK1-NEXT: ret void // // @@ -2748,11 +2819,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [4 x %struct.S]* noundef nonnull align 4 dereferenceable(16) [[VAR3:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VAR3_ADDR:%.*]] = alloca [4 x %struct.S]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca [4 x %struct.S]*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca [4 x %struct.S]*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2761,155 +2832,157 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR34:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca [4 x %struct.S]*, align 8 +// CHECK1-NEXT: [[VAR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca [4 x %struct.S]*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [4 x %struct.S]* [[VAR3]], [4 x %struct.S]** [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: store [4 x %struct.S]* [[TMP0]], [4 x %struct.S]** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[TMP]], align 8 -// CHECK1-NEXT: store [4 x %struct.S]* [[TMP1]], [4 x %struct.S]** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[TMP1]], align 8 +// CHECK1-NEXT: store [4 x %struct.S]* [[TMP2]], [4 x %struct.S]** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[TMP]], align 8 +// CHECK1-NEXT: store [4 x %struct.S]* [[TMP3]], [4 x %struct.S]** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[TMP3]], i64 0, i64 2 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[VAR34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]] +// CHECK1-NEXT: [[TMP4:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[TMP5]], i64 0, i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP5:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [4 x %struct.S]* [[TMP5]] to %struct.S* -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint %struct.S* [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint %struct.S* [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S]* [[VAR34]] to %struct.S* -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP11]], i64 [[TMP10]] -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[TMP12]] to [4 x %struct.S]* -// CHECK1-NEXT: store [4 x %struct.S]* [[TMP13]], [4 x %struct.S]** [[_TMP5]], align 8 -// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [2 x %struct.S]* [[VAR34]] to %struct.S* -// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 9 +// CHECK1-NEXT: [[TMP7:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast [4 x %struct.S]* [[TMP7]] to %struct.S* +// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint %struct.S* [[TMP8]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint %struct.S* [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[VAR3]] to %struct.S* +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP13]], i64 [[TMP12]] +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %struct.S* [[TMP14]] to [4 x %struct.S]* +// CHECK1-NEXT: store [4 x %struct.S]* [[TMP15]], [4 x %struct.S]** [[_TMP4]], align 8 +// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [2 x %struct.S]* [[VAR3]] to %struct.S* +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[RHS_BEGIN]] to i8* -// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, i8* [[TMP29]], void (i8*, i8*)* @.omp.reduction.reduction_func.18, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast %struct.S* [[RHS_BEGIN]] to i8* +// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP30]], i32 1, i64 8, i8* [[TMP31]], void (i8*, i8*)* @.omp.reduction.reduction_func.18, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP32]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP33]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST8]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST8]] to i8* -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[CALL]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST7:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST7]] to i8* +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[CALL]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST7]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE10:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done11: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP33]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done10: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY12:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY12]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY13:%.*]] -// CHECK1: omp.arraycpy.body13: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST14:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY13]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY13]] ] -// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL16:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST15]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST14]]) -// CHECK1-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST15]] to i8* -// CHECK1-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[CALL16]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT18]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST14]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY13]] -// CHECK1: omp.arraycpy.done20: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] +// CHECK1: omp.arraycpy.body12: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP38]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST14]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST13]]) +// CHECK1-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST14]] to i8* +// CHECK1-NEXT: [[TMP40:%.*]] = bitcast %struct.S* [[CALL15]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP39]], i8* align 4 [[TMP40]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP38]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT17]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY12]] +// CHECK1: omp.arraycpy.done19: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN21:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[VAR34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN21]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN20:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN20]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP39]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP41]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN21]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE22:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done22: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP41]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN20]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done21: +// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP43]]) // CHECK1-NEXT: ret void // // @@ -2949,11 +3022,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [4 x %struct.S]* noundef nonnull align 4 dereferenceable(16) [[VAR3:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VAR3_ADDR:%.*]] = alloca [4 x %struct.S]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca [4 x %struct.S]*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca [4 x %struct.S]*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2962,155 +3035,157 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR34:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca [4 x %struct.S]*, align 8 +// CHECK1-NEXT: [[VAR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca [4 x %struct.S]*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [4 x %struct.S]* [[VAR3]], [4 x %struct.S]** [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: store [4 x %struct.S]* [[TMP0]], [4 x %struct.S]** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[TMP]], align 8 -// CHECK1-NEXT: store [4 x %struct.S]* [[TMP1]], [4 x %struct.S]** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[TMP1]], align 8 +// CHECK1-NEXT: store [4 x %struct.S]* [[TMP2]], [4 x %struct.S]** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[TMP]], align 8 +// CHECK1-NEXT: store [4 x %struct.S]* [[TMP3]], [4 x %struct.S]** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[VAR34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]] +// CHECK1-NEXT: [[TMP4:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[TMP5]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP5:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [4 x %struct.S]* [[TMP5]] to %struct.S* -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint %struct.S* [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint %struct.S* [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S]* [[VAR34]] to %struct.S* -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP11]], i64 [[TMP10]] -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[TMP12]] to [4 x %struct.S]* -// CHECK1-NEXT: store [4 x %struct.S]* [[TMP13]], [4 x %struct.S]** [[_TMP5]], align 8 -// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [2 x %struct.S]* [[VAR34]] to %struct.S* -// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 9 +// CHECK1-NEXT: [[TMP7:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast [4 x %struct.S]* [[TMP7]] to %struct.S* +// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint %struct.S* [[TMP8]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint %struct.S* [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[VAR3]] to %struct.S* +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP13]], i64 [[TMP12]] +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %struct.S* [[TMP14]] to [4 x %struct.S]* +// CHECK1-NEXT: store [4 x %struct.S]* [[TMP15]], [4 x %struct.S]** [[_TMP4]], align 8 +// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [2 x %struct.S]* [[VAR3]] to %struct.S* +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[RHS_BEGIN]] to i8* -// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, i8* [[TMP29]], void (i8*, i8*)* @.omp.reduction.reduction_func.20, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast %struct.S* [[RHS_BEGIN]] to i8* +// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP30]], i32 1, i64 8, i8* [[TMP31]], void (i8*, i8*)* @.omp.reduction.reduction_func.20, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP32]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP33]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST8]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST8]] to i8* -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[CALL]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST7:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST7]] to i8* +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[CALL]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST7]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE10:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done11: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP33]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done10: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY12:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY12]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY13:%.*]] -// CHECK1: omp.arraycpy.body13: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST14:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY13]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY13]] ] -// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL16:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST15]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST14]]) -// CHECK1-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST15]] to i8* -// CHECK1-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[CALL16]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT18]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST14]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY13]] -// CHECK1: omp.arraycpy.done20: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] +// CHECK1: omp.arraycpy.body12: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP38]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST14]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST13]]) +// CHECK1-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST14]] to i8* +// CHECK1-NEXT: [[TMP40:%.*]] = bitcast %struct.S* [[CALL15]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP39]], i8* align 4 [[TMP40]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP38]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT17]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY12]] +// CHECK1: omp.arraycpy.done19: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN21:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[VAR34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN21]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN20:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN20]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP39]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP41]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN21]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE22:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done22: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP41]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN20]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done21: +// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP43]]) // CHECK1-NEXT: ret void // // @@ -3150,11 +3225,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..21 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [4 x %struct.S]* noundef nonnull align 4 dereferenceable(16) [[VAR3:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VAR3_ADDR:%.*]] = alloca [4 x %struct.S]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca [4 x %struct.S]*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca [4 x %struct.S]*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3170,161 +3245,163 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [4 x %struct.S]* [[VAR3]], [4 x %struct.S]** [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: store [4 x %struct.S]* [[TMP0]], [4 x %struct.S]** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[TMP]], align 8 -// CHECK1-NEXT: store [4 x %struct.S]* [[TMP1]], [4 x %struct.S]** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[TMP1]], align 8 +// CHECK1-NEXT: store [4 x %struct.S]* [[TMP2]], [4 x %struct.S]** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[TMP]], align 8 +// CHECK1-NEXT: store [4 x %struct.S]* [[TMP3]], [4 x %struct.S]** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[TMP3]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint %struct.S* [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint %struct.S* [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] -// CHECK1-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S:%.*]], %struct.S* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP8:%.*]] = add nuw i64 [[TMP7]], 1 -// CHECK1-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca [[STRUCT_S]], i64 [[TMP8]], align 16 -// CHECK1-NEXT: store i64 [[TMP8]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[VLA]], i64 [[TMP8]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[VLA]], [[TMP11]] +// CHECK1-NEXT: [[TMP4:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[TMP5]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP6:%.*]] = ptrtoint %struct.S* [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint %struct.S* [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP9:%.*]] = sdiv exact i64 [[TMP8]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S:%.*]], %struct.S* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP10:%.*]] = add nuw i64 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP12:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP12]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca [[STRUCT_S]], i64 [[TMP10]], align 16 +// CHECK1-NEXT: store i64 [[TMP10]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[VLA]], i64 [[TMP10]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[VLA]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP12:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [4 x %struct.S]* [[TMP12]] to %struct.S* -// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint %struct.S* [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = ptrtoint %struct.S* [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[VLA]], i64 [[TMP17]] -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[TMP18]] to [4 x %struct.S]* -// CHECK1-NEXT: store [4 x %struct.S]* [[TMP19]], [4 x %struct.S]** [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK1-NEXT: [[TMP14:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast [4 x %struct.S]* [[TMP14]] to %struct.S* +// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint %struct.S* [[TMP15]] to i64 +// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint %struct.S* [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = sub i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP19:%.*]] = sdiv exact i64 [[TMP18]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[VLA]], i64 [[TMP19]] +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[TMP20]] to [4 x %struct.S]* +// CHECK1-NEXT: store [4 x %struct.S]* [[TMP21]], [4 x %struct.S]** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[VLA]] to i8* -// CHECK1-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP8]] to i8* +// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[VLA]] to i8* // CHECK1-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP36]], i32 1, i64 16, i8* [[TMP37]], void (i8*, i8*)* @.omp.reduction.reduction_func.22, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP38]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP36:%.*]] = inttoptr i64 [[TMP10]] to i8* +// CHECK1-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP40:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP38]], i32 1, i64 16, i8* [[TMP39]], void (i8*, i8*)* @.omp.reduction.reduction_func.22, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP40]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 [[TMP8]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP39]] +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 [[TMP10]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP41]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST7:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: [[TMP40:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST7]] to i8* -// CHECK1-NEXT: [[TMP41:%.*]] = bitcast %struct.S* [[CALL]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST7]] to i8* +// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[CALL]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i64 4, i1 false) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST7]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP39]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP41]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done10: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP38]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 [[TMP8]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP42]] +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 [[TMP10]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP44]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] // CHECK1: omp.arraycpy.body12: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi %struct.S* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY12]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] -// CHECK1-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP44]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP46]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST14]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST13]]) -// CHECK1-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST14]] to i8* -// CHECK1-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[CALL15]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP44]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST14]] to i8* +// CHECK1-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[CALL15]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP46]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT17]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP42]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP44]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY12]] // CHECK1: omp.arraycpy.done19: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP38]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[VLA]], i64 [[TMP8]] -// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S* [[VLA]], [[TMP47]] +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[VLA]], i64 [[TMP10]] +// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S* [[VLA]], [[TMP49]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE20:%.*]], label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP47]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP49]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[VLA]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done20: -// CHECK1-NEXT: [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP48]]) -// CHECK1-NEXT: [[TMP49:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[TMP49]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP50]]) +// CHECK1-NEXT: [[TMP50:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP50]]) +// CHECK1-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP52]]) // CHECK1-NEXT: ret void // // @@ -3367,11 +3444,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [4 x %struct.S]* noundef nonnull align 4 dereferenceable(16) [[VAR3:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VAR3_ADDR:%.*]] = alloca [4 x %struct.S]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca [4 x %struct.S]*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca [4 x %struct.S]*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3380,144 +3457,146 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR33:%.*]] = alloca [4 x %struct.S], align 16 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca [4 x %struct.S]*, align 8 +// CHECK1-NEXT: [[VAR3:%.*]] = alloca [4 x %struct.S], align 16 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca [4 x %struct.S]*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [4 x %struct.S]* [[VAR3]], [4 x %struct.S]** [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: store [4 x %struct.S]* [[TMP0]], [4 x %struct.S]** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[TMP]], align 8 -// CHECK1-NEXT: store [4 x %struct.S]* [[TMP1]], [4 x %struct.S]** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[TMP1]], align 8 +// CHECK1-NEXT: store [4 x %struct.S]* [[TMP2]], [4 x %struct.S]** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[TMP]], align 8 +// CHECK1-NEXT: store [4 x %struct.S]* [[TMP3]], [4 x %struct.S]** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[VAR33]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP3]] +// CHECK1-NEXT: [[TMP4:%.*]] = load [4 x %struct.S]*, [4 x %struct.S]** [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 4 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: store [4 x %struct.S]* [[VAR33]], [4 x %struct.S]** [[_TMP4]], align 8 -// CHECK1-NEXT: [[LHS_BEGIN:%.*]] = bitcast [4 x %struct.S]* [[TMP2]] to %struct.S* -// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [4 x %struct.S]* [[VAR33]] to %struct.S* -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK1-NEXT: store [4 x %struct.S]* [[VAR3]], [4 x %struct.S]** [[_TMP3]], align 8 +// CHECK1-NEXT: [[LHS_BEGIN:%.*]] = bitcast [4 x %struct.S]* [[TMP4]] to %struct.S* +// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [4 x %struct.S]* [[VAR3]] to %struct.S* +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast %struct.S* [[RHS_BEGIN]] to i8* -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 1, i64 8, i8* [[TMP19]], void (i8*, i8*)* @.omp.reduction.reduction_func.24, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[RHS_BEGIN]] to i8* +// CHECK1-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func.24, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[LHS_BEGIN]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[LHS_BEGIN]], [[TMP21]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[LHS_BEGIN]], i64 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[LHS_BEGIN]], [[TMP23]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST7:%.*]] = phi %struct.S* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST7]] to i8* -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.S* [[CALL]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST7]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi %struct.S* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT7:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST6]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST6]] to i8* +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast %struct.S* [[CALL]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT7]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP21]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done10: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE8:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT7]], [[TMP23]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done9: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[LHS_BEGIN]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq %struct.S* [[LHS_BEGIN]], [[TMP24]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] -// CHECK1: omp.arraycpy.body12: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY12]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi %struct.S* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP26]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST14]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST13]]) -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST14]] to i8* -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast %struct.S* [[CALL15]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP26]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT17]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP24]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY12]] -// CHECK1: omp.arraycpy.done19: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[LHS_BEGIN]], i64 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY10:%.*]] = icmp eq %struct.S* [[LHS_BEGIN]], [[TMP26]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY10]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY11:%.*]] +// CHECK1: omp.arraycpy.body11: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST12:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY11]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi %struct.S* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY11]] ] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL14:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST13]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST12]]) +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST13]] to i8* +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[CALL14]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP26]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY11]] +// CHECK1: omp.arraycpy.done18: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN20:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[VAR33]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN20]], i64 4 +// CHECK1-NEXT: [[ARRAY_BEGIN19:%.*]] = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN19]], i64 4 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP29]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP31]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN20]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done21: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP31]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN19]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE20:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done20: +// CHECK1-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP33]]) // CHECK1-NEXT: ret void // // @@ -3561,65 +3640,98 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_12:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: [[ARR:%.*]] = alloca [42 x %struct.S.0], align 16 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.12], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.12*, align 8 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_12]], align 4 +// CHECK1-NEXT: [[ARR:%.*]] = alloca [42 x %struct.S.12], align 16 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiLi42EET_v.vec to i8*), i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK1-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [42 x %struct.S.0], [42 x %struct.S.0]* [[ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 42 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.12], [2 x %struct.S.12]* [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_12]], %struct.S.12* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK1-NEXT: store %struct.S.12* [[TEST]], %struct.S.12** [[VAR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [42 x %struct.S.12], [42 x %struct.S.12]* [[ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_12]], %struct.S.12* [[ARRAY_BEGIN]], i64 42 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.12* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_12]], %struct.S.12* [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.12* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.S.0*, %struct.S.0*, i32*, [2 x i32]*, [2 x %struct.S.0]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), i32* [[T_VAR]], %struct.S.0* [[TMP1]], %struct.S.0* [[VAR1]], i32* [[T_VAR1]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32* [[T_VAR]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [42 x %struct.S.0]*, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [42 x %struct.S.0]* [[ARR]], [2 x i32]* [[VEC]], i32* [[T_VAR]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP3]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S.12*, %struct.S.12** [[VAR]], align 8 +// CHECK1-NEXT: store %struct.S.12* [[TMP3]], %struct.S.12** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.S.12* [[VAR1]], %struct.S.12** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i32* [[T_VAR1]], i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store [2 x %struct.S.12]* [[S_ARR]], [2 x %struct.S.12]** [[TMP7]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 1 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S.12]* [[S_ARR]], [2 x %struct.S.12]** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.S.12*, %struct.S.12** [[VAR]], align 8 +// CHECK1-NEXT: store %struct.S.12* [[TMP12]], %struct.S.12** [[TMP11]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK1-NEXT: store [42 x %struct.S.12]* [[ARR]], [42 x %struct.S.12]** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 1 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 3 +// CHECK1-NEXT: store [2 x %struct.S.12]* [[S_ARR]], [2 x %struct.S.12]** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load %struct.S.12*, %struct.S.12** [[VAR]], align 8 +// CHECK1-NEXT: store %struct.S.12* [[TMP18]], %struct.S.12** [[TMP17]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN1:%.*]] = getelementptr inbounds [42 x %struct.S.0], [42 x %struct.S.0]* [[ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN1]], i64 42 +// CHECK1-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [42 x %struct.S.12], [42 x %struct.S.12]* [[ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_12]], %struct.S.12* [[ARRAY_BEGIN3]], i64 42 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP4]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN1]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done2: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN3]], i64 2 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY4:%.*]] -// CHECK1: arraydestroy.body4: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST5:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ARRAYDESTROY_DONE2]] ], [ [[ARRAYDESTROY_ELEMENT6:%.*]], [[ARRAYDESTROY_BODY4]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT6]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST5]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT6]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE7:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT6]], [[ARRAY_BEGIN3]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE7]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY4]] -// CHECK1: arraydestroy.done8: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP6]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.12* [ [[TMP19]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_12]], %struct.S.12* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.12* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN3]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done4: +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.12], [2 x %struct.S.12]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_12]], %struct.S.12* [[ARRAY_BEGIN5]], i64 2 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY6:%.*]] +// CHECK1: arraydestroy.body6: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST7:%.*]] = phi %struct.S.12* [ [[TMP20]], [[ARRAYDESTROY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT8:%.*]], [[ARRAYDESTROY_BODY6]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT8]] = getelementptr inbounds [[STRUCT_S_12]], %struct.S.12* [[ARRAYDESTROY_ELEMENTPAST7]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT8]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE9:%.*]] = icmp eq %struct.S.12* [[ARRAYDESTROY_ELEMENT8]], [[ARRAY_BEGIN5]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE9]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY6]] +// CHECK1: arraydestroy.done10: +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP21]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -3663,225 +3775,222 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.12*, align 8 +// CHECK1-NEXT: store %struct.S.12* [[THIS]], %struct.S.12** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.12*, %struct.S.12** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.12*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.12* [[THIS]], %struct.S.12** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.12*, %struct.S.12** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..25 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.12*, align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.12*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[VAR16:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: [[T_VAR17:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_12:%.*]], align 4 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca %struct.S.12*, align 8 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_12]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: [[REF_TMP22:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_12]], align 4 +// CHECK1-NEXT: [[REF_TMP18:%.*]] = alloca [[STRUCT_S_12]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR1]], %struct.S.0** [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR1]], i32** [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP6]], %struct.S.0** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S.12*, %struct.S.12** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S.12*, %struct.S.12** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load [2 x %struct.S.12]*, [2 x %struct.S.12]** [[TMP11]], align 8 +// CHECK1-NEXT: store %struct.S.12* [[TMP4]], %struct.S.12** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load %struct.S.12*, %struct.S.12** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.S.12* [[TMP13]], %struct.S.12** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: store %struct.S.0* [[VAR4]], %struct.S.0** [[_TMP5]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR16]]) -// CHECK1-NEXT: store i32 2147483647, i32* [[T_VAR17]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load %struct.S.12*, %struct.S.12** [[_TMP1]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store %struct.S.12* [[VAR]], %struct.S.12** [[_TMP3]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: store i32 2147483647, i32* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP5]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[ARRAYIDX10]] to i8* -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S.0* [[TMP18]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP10]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load %struct.S.12*, %struct.S.12** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.12], [2 x %struct.S.12]* [[TMP12]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast %struct.S.12* [[ARRAYIDX6]] to i8* +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast %struct.S.12* [[TMP25]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]]) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i32* [[T_VAR3]] to i8* -// CHECK1-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[VAR4]] to i8* -// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast %struct.S.0* [[VAR16]] to i8* -// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP30:%.*]] = bitcast i32* [[T_VAR17]] to i8* -// CHECK1-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 4, i64 32, i8* [[TMP31]], void (i8*, i8*)* @.omp.reduction.reduction_func.26, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP32]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S.12* [[VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP33]], i8** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S.12* [[VAR1]] to i8* +// CHECK1-NEXT: store i8* [[TMP35]], i8** [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP37:%.*]] = bitcast i32* [[T_VAR1]] to i8* +// CHECK1-NEXT: store i8* [[TMP37]], i8** [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = bitcast [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP16]], i32 4, i64 32, i8* [[TMP38]], void (i8*, i8*)* @.omp.reduction.reduction_func.26, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP39]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEanERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[TMP7]] to i8* -// CHECK1-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[CALL]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL13:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL13]], 0 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.12* @_ZN1SIiEanERKS0_(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[TMP14]], %struct.S.12* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast %struct.S.12* [[TMP14]] to i8* +// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.S.12* [[CALL]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL9:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL9]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL14:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR16]]) -// CHECK1-NEXT: [[TOBOOL15:%.*]] = icmp ne i32 [[CALL14]], 0 +// CHECK1-NEXT: [[CALL10:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL11:%.*]] = icmp ne i32 [[CALL10]], 0 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP37:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL15]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP37]] to i32 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[CONV]]) -// CHECK1-NEXT: [[TMP38:%.*]] = bitcast %struct.S.0* [[TMP2]] to i8* -// CHECK1-NEXT: [[TMP39:%.*]] = bitcast %struct.S.0* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[T_VAR17]], align 4 -// CHECK1-NEXT: [[CMP16:%.*]] = icmp slt i32 [[TMP40]], [[TMP41]] -// CHECK1-NEXT: br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]] -// CHECK1: cond.true17: -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: br label [[COND_END19:%.*]] -// CHECK1: cond.false18: -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[T_VAR17]], align 4 -// CHECK1-NEXT: br label [[COND_END19]] -// CHECK1: cond.end19: -// CHECK1-NEXT: [[COND20:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE17]] ], [ [[TMP43]], [[COND_FALSE18]] ] -// CHECK1-NEXT: store i32 [[COND20]], i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP44:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL11]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP44]] to i32 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[CONV]]) +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast %struct.S.12* [[TMP6]] to i8* +// CHECK1-NEXT: [[TMP46:%.*]] = bitcast %struct.S.12* [[REF_TMP]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp slt i32 [[TMP47]], [[TMP48]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[COND_TRUE13:%.*]], label [[COND_FALSE14:%.*]] +// CHECK1: cond.true13: +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK1-NEXT: br label [[COND_END15:%.*]] +// CHECK1: cond.false14: +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[T_VAR1]], align 4 +// CHECK1-NEXT: br label [[COND_END15]] +// CHECK1: cond.end15: +// CHECK1-NEXT: [[COND16:%.*]] = phi i32 [ [[TMP49]], [[COND_TRUE13]] ], [ [[TMP50]], [[COND_FALSE14]] ] +// CHECK1-NEXT: store i32 [[COND16]], i32* [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP16]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP44]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL21:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEanERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP46:%.*]] = bitcast %struct.S.0* [[TMP7]] to i8* -// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %struct.S.0* [[CALL21]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL23:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL24:%.*]] = icmp ne i32 [[CALL23]], 0 -// CHECK1-NEXT: br i1 [[TOBOOL24]], label [[LAND_RHS25:%.*]], label [[LAND_END28:%.*]] -// CHECK1: land.rhs25: -// CHECK1-NEXT: [[CALL26:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR16]]) -// CHECK1-NEXT: [[TOBOOL27:%.*]] = icmp ne i32 [[CALL26]], 0 -// CHECK1-NEXT: br label [[LAND_END28]] -// CHECK1: land.end28: -// CHECK1-NEXT: [[TMP48:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL27]], [[LAND_RHS25]] ] -// CHECK1-NEXT: [[CONV29:%.*]] = zext i1 [[TMP48]] to i32 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP22]], i32 noundef [[CONV29]]) -// CHECK1-NEXT: [[TMP49:%.*]] = bitcast %struct.S.0* [[TMP2]] to i8* -// CHECK1-NEXT: [[TMP50:%.*]] = bitcast %struct.S.0* [[REF_TMP22]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP49]], i8* align 4 [[TMP50]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP22]]) #[[ATTR5]] -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[T_VAR17]], align 4 -// CHECK1-NEXT: [[TMP52:%.*]] = atomicrmw min i32* [[TMP3]], i32 [[TMP51]] monotonic, align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP51]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL17:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.12* @_ZN1SIiEanERKS0_(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[TMP14]], %struct.S.12* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP53:%.*]] = bitcast %struct.S.12* [[TMP14]] to i8* +// CHECK1-NEXT: [[TMP54:%.*]] = bitcast %struct.S.12* [[CALL17]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP53]], i8* align 4 [[TMP54]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL19:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL20:%.*]] = icmp ne i32 [[CALL19]], 0 +// CHECK1-NEXT: br i1 [[TOBOOL20]], label [[LAND_RHS21:%.*]], label [[LAND_END24:%.*]] +// CHECK1: land.rhs21: +// CHECK1-NEXT: [[CALL22:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL23:%.*]] = icmp ne i32 [[CALL22]], 0 +// CHECK1-NEXT: br label [[LAND_END24]] +// CHECK1: land.end24: +// CHECK1-NEXT: [[TMP55:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL23]], [[LAND_RHS21]] ] +// CHECK1-NEXT: [[CONV25:%.*]] = zext i1 [[TMP55]] to i32 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[REF_TMP18]], i32 noundef [[CONV25]]) +// CHECK1-NEXT: [[TMP56:%.*]] = bitcast %struct.S.12* [[TMP6]] to i8* +// CHECK1-NEXT: [[TMP57:%.*]] = bitcast %struct.S.12* [[REF_TMP18]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP56]], i8* align 4 [[TMP57]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[REF_TMP18]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP59:%.*]] = atomicrmw min i32* [[TMP8]], i32 [[TMP58]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR16]]) #[[ATTR5]] -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK1-NEXT: ret void // // @@ -3890,7 +3999,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_12:%.*]], align 4 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8 @@ -3905,16 +4014,16 @@ // CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32* // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[TMP5]], i64 0, i64 1 // CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to %struct.S.0* +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to %struct.S.12* // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to %struct.S.0* +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to %struct.S.12* // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[TMP5]], i64 0, i64 2 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to %struct.S.0* +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to %struct.S.12* // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[TMP3]], i64 0, i64 2 // CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP22]] to %struct.S.0* +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP22]] to %struct.S.12* // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[TMP5]], i64 0, i64 3 // CHECK1-NEXT: [[TMP25:%.*]] = load i8*, i8** [[TMP24]], align 8 // CHECK1-NEXT: [[TMP26:%.*]] = bitcast i8* [[TMP25]] to i32* @@ -3925,25 +4034,25 @@ // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP11]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEanERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP17]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP14]]) -// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[TMP17]] to i8* -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[CALL]] to i8* +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.12* @_ZN1SIiEanERKS0_(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[TMP17]], %struct.S.12* noundef nonnull align 4 dereferenceable(4) [[TMP14]]) +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S.12* [[TMP17]] to i8* +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S.12* [[CALL]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL2:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP23]]) +// CHECK1-NEXT: [[CALL2:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[TMP23]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL2]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP20]]) +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[TMP20]]) // CHECK1-NEXT: [[TOBOOL4:%.*]] = icmp ne i32 [[CALL3]], 0 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: // CHECK1-NEXT: [[TMP34:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TOBOOL4]], [[LAND_RHS]] ] // CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP34]] to i32 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[CONV]]) -// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[TMP23]] to i8* -// CHECK1-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[REF_TMP]] to i8* +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[CONV]]) +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S.12* [[TMP23]] to i8* +// CHECK1-NEXT: [[TMP36:%.*]] = bitcast %struct.S.12* [[REF_TMP]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP29]], align 4 // CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP26]], align 4 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP37]], [[TMP38]] @@ -3961,178 +4070,177 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEanERKS0_ -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR7]] align 2 { +// CHECK1-SAME: (%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.12* noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR7]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP0]], %struct.S.0** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: ret %struct.S.0* [[THIS1]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.12*, align 8 +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct.S.12*, align 8 +// CHECK1-NEXT: store %struct.S.12* [[THIS]], %struct.S.12** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.12* [[TMP0]], %struct.S.12** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.12*, %struct.S.12** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: ret %struct.S.12* [[THIS1]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEcviEv -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) #[[ATTR7]] align 2 { +// CHECK1-SAME: (%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) #[[ATTR7]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.12*, align 8 +// CHECK1-NEXT: store %struct.S.12* [[THIS]], %struct.S.12** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.12*, %struct.S.12** [[THIS_ADDR]], align 8 // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.12*, align 8 +// CHECK1-NEXT: store %struct.S.12* [[THIS]], %struct.S.12** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.12*, %struct.S.12** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.12*, align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.12*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP10:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP9:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.12]*, [2 x %struct.S.12]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.12*, %struct.S.12** [[TMP7]], align 8 +// CHECK1-NEXT: store %struct.S.12* [[TMP8]], %struct.S.12** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load %struct.S.12*, %struct.S.12** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.S.12* [[TMP9]], %struct.S.12** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 1, i32* [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 1, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[TMP15]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.S.12*, %struct.S.12** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.12], [2 x %struct.S.12]* [[TMP6]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.S.12* [[ARRAYIDX5]] to i8* +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.S.12* [[TMP20]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[T_VAR3]] to i8* -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, i8* [[TMP22]], void (i8*, i8*)* @.omp.reduction.reduction_func.28, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 1, i64 8, i8* [[TMP27]], void (i8*, i8*)* @.omp.reduction.reduction_func.28, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP28]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TOBOOL8:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TOBOOL7:%.*]] = icmp ne i32 [[TMP30]], 0 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP26:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL8]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP26]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP31:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL7]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP31]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TOBOOL9:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP0]] monotonic, align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TOBOOL8:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP2]] monotonic, align 4 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP28:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP34:%.*]], [[LAND_END14:%.*]] ] -// CHECK1-NEXT: store i32 [[TMP28]], i32* [[_TMP10]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[_TMP10]], align 4 -// CHECK1-NEXT: [[TOBOOL11:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TOBOOL11]], label [[LAND_RHS12:%.*]], label [[LAND_END14]] -// CHECK1: land.rhs12: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TOBOOL13:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK1-NEXT: br label [[LAND_END14]] -// CHECK1: land.end14: -// CHECK1-NEXT: [[TMP31:%.*]] = phi i1 [ false, [[ATOMIC_CONT]] ], [ [[TOBOOL13]], [[LAND_RHS12]] ] -// CHECK1-NEXT: [[CONV15:%.*]] = zext i1 [[TMP31]] to i32 -// CHECK1-NEXT: store i32 [[CONV15]], i32* [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = cmpxchg i32* [[TMP0]], i32 [[TMP28]], i32 [[TMP32]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP34]] = extractvalue { i32, i1 } [[TMP33]], 0 -// CHECK1-NEXT: [[TMP35:%.*]] = extractvalue { i32, i1 } [[TMP33]], 1 -// CHECK1-NEXT: br i1 [[TMP35]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP33:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP39:%.*]], [[LAND_END13:%.*]] ] +// CHECK1-NEXT: store i32 [[TMP33]], i32* [[_TMP9]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[_TMP9]], align 4 +// CHECK1-NEXT: [[TOBOOL10:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK1-NEXT: br i1 [[TOBOOL10]], label [[LAND_RHS11:%.*]], label [[LAND_END13]] +// CHECK1: land.rhs11: +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TOBOOL12:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK1-NEXT: br label [[LAND_END13]] +// CHECK1: land.end13: +// CHECK1-NEXT: [[TMP36:%.*]] = phi i1 [ false, [[ATOMIC_CONT]] ], [ [[TOBOOL12]], [[LAND_RHS11]] ] +// CHECK1-NEXT: [[CONV14:%.*]] = zext i1 [[TMP36]] to i32 +// CHECK1-NEXT: store i32 [[CONV14]], i32* [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP33]], i32 [[TMP37]] monotonic monotonic, align 4 +// CHECK1-NEXT: [[TMP39]] = extractvalue { i32, i1 } [[TMP38]], 0 +// CHECK1-NEXT: [[TMP40:%.*]] = extractvalue { i32, i1 } [[TMP38]], 1 +// CHECK1-NEXT: br i1 [[TMP40]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP6]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP11]]) // CHECK1-NEXT: ret void // // @@ -4168,197 +4276,195 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..29 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [42 x %struct.S.0]* noundef nonnull align 4 dereferenceable(168) [[ARR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARR_ADDR:%.*]] = alloca [42 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.12*, align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.12*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARR4:%.*]] = alloca [40 x %struct.S.0], align 16 +// CHECK1-NEXT: [[ARR:%.*]] = alloca [40 x %struct.S.12], align 16 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[REF_TMP20:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_12:%.*]], align 4 +// CHECK1-NEXT: [[REF_TMP19:%.*]] = alloca [[STRUCT_S_12]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [42 x %struct.S.0]* [[ARR]], [42 x %struct.S.0]** [[ARR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [42 x %struct.S.0]*, [42 x %struct.S.0]** [[ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP5]], %struct.S.0** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [42 x %struct.S.12]*, [42 x %struct.S.12]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load [2 x %struct.S.12]*, [2 x %struct.S.12]** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.S.12*, %struct.S.12** [[TMP9]], align 8 +// CHECK1-NEXT: store %struct.S.12* [[TMP10]], %struct.S.12** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load %struct.S.12*, %struct.S.12** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.S.12* [[TMP11]], %struct.S.12** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [42 x %struct.S.0], [42 x %struct.S.0]* [[TMP0]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [42 x %struct.S.0], [42 x %struct.S.0]* [[TMP0]], i64 0, i64 40 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [40 x %struct.S.0], [40 x %struct.S.0]* [[ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [42 x %struct.S.12], [42 x %struct.S.12]* [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [42 x %struct.S.12], [42 x %struct.S.12]* [[TMP2]], i64 0, i64 40 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [40 x %struct.S.12], [40 x %struct.S.12]* [[ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_12]], %struct.S.12* [[ARRAY_BEGIN]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.12* [[ARRAY_BEGIN]], [[TMP12]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.12* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_12]], %struct.S.12* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.12* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [42 x %struct.S.0]* [[TMP0]] to %struct.S.0* -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint %struct.S.0* [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint %struct.S.0* [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (%struct.S.0* getelementptr ([[STRUCT_S_0]], %struct.S.0* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [40 x %struct.S.0]* [[ARR4]] to %struct.S.0* -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[TMP12]], i64 [[TMP11]] -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[TMP13]] to [42 x %struct.S.0]* -// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [40 x %struct.S.0]* [[ARR4]] to %struct.S.0* -// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [42 x %struct.S.12]* [[TMP2]] to %struct.S.12* +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint %struct.S.12* [[TMP13]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = ptrtoint %struct.S.12* [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (%struct.S.12* getelementptr ([[STRUCT_S_12]], %struct.S.12* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [40 x %struct.S.12]* [[ARR]] to %struct.S.12* +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr [[STRUCT_S_12]], %struct.S.12* [[TMP18]], i64 [[TMP17]] +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S.12* [[TMP19]] to [42 x %struct.S.12]* +// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [40 x %struct.S.12]* [[ARR]] to %struct.S.12* +// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP23]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[ARRAYIDX6]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP3]], i64 0, i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast %struct.S.0* [[TMP25]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP29]], i32* [[ARRAYIDX5]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load %struct.S.12*, %struct.S.12** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP32]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.12], [2 x %struct.S.12]* [[TMP8]], i64 0, i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S.12* [[ARRAYIDX7]] to i8* +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S.12* [[TMP31]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]]) -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[RHS_BEGIN]] to i8* -// CHECK1-NEXT: store i8* [[TMP33]], i8** [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP37:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP35]], i32 1, i64 8, i8* [[TMP36]], void (i8*, i8*)* @.omp.reduction.reduction_func.30, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP37]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP37]]) +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP39:%.*]] = bitcast %struct.S.12* [[RHS_BEGIN]] to i8* +// CHECK1-NEXT: store i8* [[TMP39]], i8** [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP43:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP41]], i32 1, i64 8, i8* [[TMP42]], void (i8*, i8*)* @.omp.reduction.reduction_func.30, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP43]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP38]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE15:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr [[STRUCT_S_12]], %struct.S.12* [[ARRAYIDX]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.12* [[ARRAYIDX]], [[TMP44]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT13:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST10]]) -// CHECK1-NEXT: [[CALL11:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[CALL]], [[CALL11]] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[ADD12]]) -// CHECK1-NEXT: [[TMP39:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST10]] to i8* -// CHECK1-NEXT: [[TMP40:%.*]] = bitcast %struct.S.0* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP39]], i8* align 4 [[TMP40]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT13]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE14:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT13]], [[TMP38]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done15: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP35]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.12* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi %struct.S.12* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST9]]) +// CHECK1-NEXT: [[CALL10:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[CALL]], [[CALL10]] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[ADD11]]) +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast %struct.S.12* [[OMP_ARRAYCPY_DESTELEMENTPAST9]] to i8* +// CHECK1-NEXT: [[TMP46:%.*]] = bitcast %struct.S.12* [[REF_TMP]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT12]] = getelementptr [[STRUCT_S_12]], %struct.S.12* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_12]], %struct.S.12* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq %struct.S.12* [[OMP_ARRAYCPY_DEST_ELEMENT12]], [[TMP44]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done14: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP41]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY16:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP41]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY16]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY17:%.*]] -// CHECK1: omp.arraycpy.body17: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST18:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY17]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST19:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY17]] ] -// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP43]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL21:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST19]]) -// CHECK1-NEXT: [[CALL22:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST18]]) -// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[CALL21]], [[CALL22]] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP20]], i32 noundef [[ADD23]]) -// CHECK1-NEXT: [[TMP44:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST19]] to i8* -// CHECK1-NEXT: [[TMP45:%.*]] = bitcast %struct.S.0* [[REF_TMP20]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP44]], i8* align 4 [[TMP45]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP20]]) #[[ATTR5]] -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP43]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST18]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP41]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY17]] -// CHECK1: omp.arraycpy.done27: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP35]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr [[STRUCT_S_12]], %struct.S.12* [[ARRAYIDX]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY15:%.*]] = icmp eq %struct.S.12* [[ARRAYIDX]], [[TMP47]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY15]], label [[OMP_ARRAYCPY_DONE26:%.*]], label [[OMP_ARRAYCPY_BODY16:%.*]] +// CHECK1: omp.arraycpy.body16: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST17:%.*]] = phi %struct.S.12* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY16]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST18:%.*]] = phi %struct.S.12* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY16]] ] +// CHECK1-NEXT: [[TMP48:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP48]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP49]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL20:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST18]]) +// CHECK1-NEXT: [[CALL21:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST17]]) +// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[CALL20]], [[CALL21]] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[REF_TMP19]], i32 noundef [[ADD22]]) +// CHECK1-NEXT: [[TMP50:%.*]] = bitcast %struct.S.12* [[OMP_ARRAYCPY_DESTELEMENTPAST18]] to i8* +// CHECK1-NEXT: [[TMP51:%.*]] = bitcast %struct.S.12* [[REF_TMP19]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP50]], i8* align 4 [[TMP51]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[REF_TMP19]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP49]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr [[STRUCT_S_12]], %struct.S.12* [[OMP_ARRAYCPY_DESTELEMENTPAST18]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT24]] = getelementptr [[STRUCT_S_12]], %struct.S.12* [[OMP_ARRAYCPY_SRCELEMENTPAST17]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq %struct.S.12* [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP47]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_BODY16]] +// CHECK1: omp.arraycpy.done26: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP41]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN28:%.*]] = getelementptr inbounds [40 x %struct.S.0], [40 x %struct.S.0]* [[ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN28]], i64 40 +// CHECK1-NEXT: [[ARRAY_BEGIN27:%.*]] = getelementptr inbounds [40 x %struct.S.12], [40 x %struct.S.12]* [[ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_S_12]], %struct.S.12* [[ARRAY_BEGIN27]], i64 40 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP46]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN28]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE29:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done29: -// CHECK1-NEXT: [[TMP47:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[TMP47]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP48]]) +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.12* [ [[TMP52]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_12]], %struct.S.12* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.12* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN27]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE28:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done28: +// CHECK1-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP54]]) // CHECK1-NEXT: ret void // // @@ -4367,7 +4473,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_12:%.*]], align 4 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8 @@ -4376,39 +4482,39 @@ // CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.S.0* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.S.12* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0 // CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.S.0* -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[TMP11]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.S.12* +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_12]], %struct.S.12* [[TMP11]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.12* [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP8]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) -// CHECK1-NEXT: [[CALL2:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.12* [ [[TMP8]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.12* [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) +// CHECK1-NEXT: [[CALL2:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL2]] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[ADD]]) -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[REF_TMP]] to i8* +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[ADD]]) +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S.12* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S.12* [[REF_TMP]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_12]], %struct.S.12* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_12]], %struct.S.12* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.12* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done3: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.12*, align 8 +// CHECK1-NEXT: store %struct.S.12* [[THIS]], %struct.S.12** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.12*, %struct.S.12** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_12:%.*]], %struct.S.12* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile double, double* @g, align 8 // CHECK1-NEXT: [[CONV:%.*]] = fptosi double [[TMP0]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[F]], align 4 @@ -4416,14 +4522,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.12*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.12* [[THIS]], %struct.S.12** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.12*, %struct.S.12** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_12:%.*]], %struct.S.12* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double // CHECK1-NEXT: [[TMP1:%.*]] = load volatile double, double* @g, align 8 @@ -4434,11 +4540,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.12* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.12*, align 8 +// CHECK1-NEXT: store %struct.S.12* [[THIS]], %struct.S.12** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.12*, %struct.S.12** [[THIS_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -4453,10 +4559,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4472,96 +4579,98 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** @g1, align 8 -// CHECK3-NEXT: store double* [[TMP0]], double** [[TMP]], align 8 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** @g1, align 8 +// CHECK3-NEXT: store double* [[TMP1]], double** [[TMP]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store double 0.000000e+00, double* [[G]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** @g1, align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** @g1, align 8 // CHECK3-NEXT: store double 0.000000e+00, double* [[G1]], align 8 // CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK3-NEXT: store double 1.000000e+00, double* [[G]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP2]], align 8 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G]], double** [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP2]], align 8 -// CHECK3-NEXT: store double* [[TMP13]], double** [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP2]], align 8 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP2]], align 8 +// CHECK3-NEXT: store double* [[TMP14]], double** [[TMP13]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast double* [[G]] to i8* -// CHECK3-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK3-NEXT: [[TMP18:%.*]] = bitcast double* [[G1]] to i8* -// CHECK3-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 2, i64 16, i8* [[TMP19]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast double* [[G]] to i8* +// CHECK3-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast double* [[G1]] to i8* +// CHECK3-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 2, i64 16, i8* [[TMP20]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP21:%.*]] = load double, double* @g, align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[G]], align 8 -// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[TMP21]], [[TMP22]] +// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* @g, align 8 +// CHECK3-NEXT: [[TMP23:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[TMP22]], [[TMP23]] // CHECK3-NEXT: store double [[ADD5]], double* @g, align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = load double, double* [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[G1]], align 8 -// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP24]] -// CHECK3-NEXT: store double [[ADD6]], double* [[TMP1]], align 8 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[G1]], align 8 +// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store double [[ADD6]], double* [[TMP2]], align 8 +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[G]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = atomicrmw fadd double* @g, double [[TMP25]] monotonic, align 8 -// CHECK3-NEXT: [[TMP27:%.*]] = load double, double* [[G1]], align 8 -// CHECK3-NEXT: [[TMP28:%.*]] = atomicrmw fadd double* [[TMP1]], double [[TMP27]] monotonic, align 8 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP26:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: [[TMP27:%.*]] = atomicrmw fadd double* @g, double [[TMP26]] monotonic, align 8 +// CHECK3-NEXT: [[TMP28:%.*]] = load double, double* [[G1]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = atomicrmw fadd double* [[TMP2]], double [[TMP28]] monotonic, align 8 +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP3]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -4615,18 +4724,20 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4642,46 +4753,48 @@ // CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load double*, double** @g1, align 8 -// CHECK4-NEXT: store double* [[TMP0]], double** [[TMP]], align 8 +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load double*, double** @g1, align 8 +// CHECK4-NEXT: store double* [[TMP1]], double** [[TMP]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: store double 0.000000e+00, double* [[G]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load double*, double** @g1, align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load double*, double** @g1, align 8 // CHECK4-NEXT: store double 0.000000e+00, double* [[G1]], align 8 // CHECK4-NEXT: store double* [[G1]], double** [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK4-NEXT: store double 1.000000e+00, double* [[G]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP2]], align 8 -// CHECK4-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP2]], align 8 +// CHECK4-NEXT: store volatile double 1.000000e+00, double* [[TMP11]], align 8 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double* }>* [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double* }>* [[BLOCK]], i32 0, i32 1 @@ -4693,62 +4806,62 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double* }>* [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.1 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double* }>* [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP11:%.*]] = load volatile double, double* [[G]], align 8 -// CHECK4-NEXT: store volatile double [[TMP11]], double* [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load volatile double, double* [[G]], align 8 +// CHECK4-NEXT: store volatile double [[TMP12]], double* [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED4:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double* }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP2]], align 8 -// CHECK4-NEXT: store double* [[TMP12]], double** [[BLOCK_CAPTURED4]], align 8 -// CHECK4-NEXT: [[TMP13:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double* }>* [[BLOCK]] to void ()* -// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP13]] to %struct.__block_literal_generic* -// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP15:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK4-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP14]], align 8 -// CHECK4-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP17]](i8* noundef [[TMP15]]) +// CHECK4-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP2]], align 8 +// CHECK4-NEXT: store double* [[TMP13]], double** [[BLOCK_CAPTURED4]], align 8 +// CHECK4-NEXT: [[TMP14:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, double* }>* [[BLOCK]] to void ()* +// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP14]] to %struct.__block_literal_generic* +// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP16:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK4-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP15]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP18]](i8* noundef [[TMP16]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK4-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP20:%.*]] = bitcast double* [[G]] to i8* -// CHECK4-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP22:%.*]] = bitcast double* [[G1]] to i8* -// CHECK4-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 -// CHECK4-NEXT: [[TMP23:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK4-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 2, i64 16, i8* [[TMP23]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK4-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP21:%.*]] = bitcast double* [[G]] to i8* +// CHECK4-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK4-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK4-NEXT: [[TMP23:%.*]] = bitcast double* [[G1]] to i8* +// CHECK4-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 +// CHECK4-NEXT: [[TMP24:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK4-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 2, i64 16, i8* [[TMP24]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.reduction.case1: -// CHECK4-NEXT: [[TMP25:%.*]] = load double, double* @g, align 8 -// CHECK4-NEXT: [[TMP26:%.*]] = load double, double* [[G]], align 8 -// CHECK4-NEXT: [[ADD6:%.*]] = fadd double [[TMP25]], [[TMP26]] +// CHECK4-NEXT: [[TMP26:%.*]] = load double, double* @g, align 8 +// CHECK4-NEXT: [[TMP27:%.*]] = load double, double* [[G]], align 8 +// CHECK4-NEXT: [[ADD6:%.*]] = fadd double [[TMP26]], [[TMP27]] // CHECK4-NEXT: store double [[ADD6]], double* @g, align 8 -// CHECK4-NEXT: [[TMP27:%.*]] = load double, double* [[TMP1]], align 8 -// CHECK4-NEXT: [[TMP28:%.*]] = load double, double* [[G1]], align 8 -// CHECK4-NEXT: [[ADD7:%.*]] = fadd double [[TMP27]], [[TMP28]] -// CHECK4-NEXT: store double [[ADD7]], double* [[TMP1]], align 8 -// CHECK4-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP28:%.*]] = load double, double* [[TMP2]], align 8 +// CHECK4-NEXT: [[TMP29:%.*]] = load double, double* [[G1]], align 8 +// CHECK4-NEXT: [[ADD7:%.*]] = fadd double [[TMP28]], [[TMP29]] +// CHECK4-NEXT: store double [[ADD7]], double* [[TMP2]], align 8 +// CHECK4-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.case2: -// CHECK4-NEXT: [[TMP29:%.*]] = load double, double* [[G]], align 8 -// CHECK4-NEXT: [[TMP30:%.*]] = atomicrmw fadd double* @g, double [[TMP29]] monotonic, align 8 -// CHECK4-NEXT: [[TMP31:%.*]] = load double, double* [[G1]], align 8 -// CHECK4-NEXT: [[TMP32:%.*]] = atomicrmw fadd double* [[TMP1]], double [[TMP31]] monotonic, align 8 -// CHECK4-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP30:%.*]] = load double, double* [[G]], align 8 +// CHECK4-NEXT: [[TMP31:%.*]] = atomicrmw fadd double* @g, double [[TMP30]] monotonic, align 8 +// CHECK4-NEXT: [[TMP32:%.*]] = load double, double* [[G1]], align 8 +// CHECK4-NEXT: [[TMP33:%.*]] = atomicrmw fadd double* [[TMP2]], double [[TMP32]] monotonic, align 8 +// CHECK4-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.default: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP3]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp --- a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp +++ b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp @@ -479,8 +479,11 @@ // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(12) [[S]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.S* [[S]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S* [[S]], %struct.S** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(12) [[S]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: ret void // @@ -496,45 +499,47 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(12) [[S:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[S1:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: call void @.omp_initializer.(%struct.S* noundef [[S1]], %struct.S* noundef [[TMP0]]) +// CHECK1-NEXT: call void @.omp_initializer.(%struct.S* noundef [[S]], %struct.S* noundef [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], 10 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !3 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: store i32 10, i32* [[I]], align 4 -// CHECK1-NEXT: call void @.omp_combiner.(%struct.S* noundef [[TMP0]], %struct.S* noundef [[S1]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(12) [[S1]]) #[[ATTR4]] +// CHECK1-NEXT: call void @.omp_combiner.(%struct.S* noundef [[TMP2]], %struct.S* noundef [[S]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(12) [[S]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -619,8 +624,15 @@ // CHECK1-NEXT: [[VAR2:%.*]] = alloca %struct.S.0**, align 8 // CHECK1-NEXT: [[VVAR2:%.*]] = alloca [5 x %struct.S.0], align 16 // CHECK1-NEXT: [[VAR3:%.*]] = alloca [4 x %struct.S.0]*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_10:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_12:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_13:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_14:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_15:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(12) [[TEST]]) // CHECK1-NEXT: store float 0.000000e+00, float* [[T_VAR]], align 4 @@ -659,62 +671,100 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE8]], label [[ARRAYCTOR_CONT9:%.*]], label [[ARRAYCTOR_LOOP5]] // CHECK1: arrayctor.cont9: // CHECK1-NEXT: store [4 x %struct.S.0]* [[S_ARR]], [4 x %struct.S.0]** [[VAR3]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, %struct.S.0*, %struct.S.0*, float*, [2 x i32]*, [4 x %struct.S.0]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), float* [[T_VAR]], %struct.S.0* [[TMP1]], %struct.S.0* [[VAR1]], float* [[T_VAR1]], [2 x i32]* [[VEC]], [4 x %struct.S.0]* [[S_ARR]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store float* [[T_VAR]], float** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 8 +// CHECK1-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.S.0* [[VAR1]], %struct.S.0** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[T_VAR1]], float** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store [4 x %struct.S.0]* [[S_ARR]], [4 x %struct.S.0]** [[TMP7]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = mul nuw i64 10, [[TMP3]] -// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP5]], align 16 -// CHECK1-NEXT: store i64 [[TMP3]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, [2 x i32]*, [10 x [4 x %struct.S.0]]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 10, i64 [[TMP3]], i32* [[VLA]], [2 x i32]* [[VEC]], [10 x [4 x %struct.S.0]]* [[ARRS]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, [10 x [4 x %struct.S.0]]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 10, i64 [[TMP3]], i32* [[VLA]], [10 x [4 x %struct.S.0]]* [[ARRS]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S.0***)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.S.0*** [[VAR2]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [5 x %struct.S.0]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [5 x %struct.S.0]* [[VVAR2]]) -// CHECK1-NEXT: [[TMP6:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[VAR3]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [4 x %struct.S.0]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), [4 x %struct.S.0]* [[TMP6]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[VAR3]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [4 x %struct.S.0]*)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [4 x %struct.S.0]* [[TMP7]]) -// CHECK1-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z5tmainIiLi42EET_v() -// CHECK1-NEXT: store i32 [[CALL10]], i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP8]]) -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[VVAR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i64 5 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = mul nuw i64 10, [[TMP9]] +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP11]], align 16 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 0 +// CHECK1-NEXT: store i64 10, i64* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[VLA]], i32** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 3 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 4 +// CHECK1-NEXT: store [10 x [4 x %struct.S.0]]* [[ARRS]], [10 x [4 x %struct.S.0]]** [[TMP16]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_10]]) +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK1-NEXT: store i64 10, i64* [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[VLA]], i32** [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 3 +// CHECK1-NEXT: store [10 x [4 x %struct.S.0]]* [[ARRS]], [10 x [4 x %struct.S.0]]** [[TMP20]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_11]]) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_12]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S.0*** [[VAR2]], %struct.S.0**** [[TMP21]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_12]]) +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_13]], i32 0, i32 0 +// CHECK1-NEXT: store [5 x %struct.S.0]* [[VVAR2]], [5 x %struct.S.0]** [[TMP22]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_13]]) +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_14]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[VAR3]], align 8 +// CHECK1-NEXT: store [4 x %struct.S.0]* [[TMP24]], [4 x %struct.S.0]** [[TMP23]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_14]]) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_15]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[VAR3]], align 8 +// CHECK1-NEXT: store [4 x %struct.S.0]* [[TMP26]], [4 x %struct.S.0]** [[TMP25]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..21 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_15]]) +// CHECK1-NEXT: [[CALL16:%.*]] = call noundef i32 @_Z5tmainIiLi42EET_v() +// CHECK1-NEXT: store i32 [[CALL16]], i32* [[RETVAL]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP27]]) +// CHECK1-NEXT: [[ARRAY_BEGIN17:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[VVAR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN17]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP9]], [[ARRAYCTOR_CONT9]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP28]], [[ARRAYCTOR_CONT9]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done12: -// CHECK1-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], [10 x [4 x %struct.S.0]]* [[ARRS]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN13]], i64 40 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY14:%.*]] -// CHECK1: arraydestroy.body14: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST15:%.*]] = phi %struct.S.0* [ [[TMP10]], [[ARRAYDESTROY_DONE12]] ], [ [[ARRAYDESTROY_ELEMENT16:%.*]], [[ARRAYDESTROY_BODY14]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT16]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST15]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT16]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE17:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT16]], [[ARRAY_BEGIN13]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE17]], label [[ARRAYDESTROY_DONE18:%.*]], label [[ARRAYDESTROY_BODY14]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN17]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE18:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done18: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(12) [[VAR1]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN19:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN19]], i64 4 +// CHECK1-NEXT: [[ARRAY_BEGIN19:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], [10 x [4 x %struct.S.0]]* [[ARRS]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN19]], i64 40 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY20:%.*]] // CHECK1: arraydestroy.body20: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST21:%.*]] = phi %struct.S.0* [ [[TMP11]], [[ARRAYDESTROY_DONE18]] ], [ [[ARRAYDESTROY_ELEMENT22:%.*]], [[ARRAYDESTROY_BODY20]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST21:%.*]] = phi %struct.S.0* [ [[TMP29]], [[ARRAYDESTROY_DONE18]] ], [ [[ARRAYDESTROY_ELEMENT22:%.*]], [[ARRAYDESTROY_BODY20]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT22]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST21]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT22]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE23:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT22]], [[ARRAY_BEGIN19]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE23]], label [[ARRAYDESTROY_DONE24:%.*]], label [[ARRAYDESTROY_BODY20]] // CHECK1: arraydestroy.done24: +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(12) [[VAR1]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAY_BEGIN25:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN25]], i64 4 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY26:%.*]] +// CHECK1: arraydestroy.body26: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST27:%.*]] = phi %struct.S.0* [ [[TMP30]], [[ARRAYDESTROY_DONE24]] ], [ [[ARRAYDESTROY_ELEMENT28:%.*]], [[ARRAYDESTROY_BODY26]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT28]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST27]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT28]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE29:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT28]], [[ARRAY_BEGIN25]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE29]], label [[ARRAYDESTROY_DONE30:%.*]], label [[ARRAYDESTROY_BODY26]] +// CHECK1: arraydestroy.done30: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(12) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP12]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP31]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -741,16 +791,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(12) [[VAR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(12) [[VAR1:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [4 x %struct.S.0]* noundef nonnull align 4 dereferenceable(48) [[S_ARR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca float*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca float*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [4 x %struct.S.0]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -759,155 +804,157 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[VAR17:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: [[T_VAR18:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store float* [[T_VAR]], float** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR1]], %struct.S.0** [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store float* [[T_VAR1]], float** [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store [4 x %struct.S.0]* [[S_ARR]], [4 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float*, float** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP6]], %struct.S.0** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[TMP11]], align 8 +// CHECK1-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.S.0* [[TMP13]], %struct.S.0** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @.omp_initializer..3(float* noundef [[T_VAR3]], float* noundef [[TMP0]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast %struct.S.0* [[VAR4]] to i8* -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP8]], i64 4 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct.S.0* [[TMP7]] to i8* -// CHECK1-NEXT: [[ADD_PTR5:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 4 -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8* [[ADD_PTR5]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* noundef [[TMP9]], %struct.BaseS1* noundef [[TMP11]]) -// CHECK1-NEXT: store %struct.S.0* [[VAR4]], %struct.S.0** [[_TMP6]], align 8 -// CHECK1-NEXT: call void @.omp_initializer..7(%struct.S.0* noundef [[VAR17]], %struct.S.0* noundef [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load float, float* @.init, align 4 -// CHECK1-NEXT: store float [[TMP12]], float* [[T_VAR18]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 +// CHECK1-NEXT: call void @.omp_initializer..3(float* noundef [[T_VAR]], float* noundef [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP15]], i64 4 +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast %struct.S.0* [[TMP14]] to i8* +// CHECK1-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i8, i8* [[TMP17]], i64 4 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8* [[ADD_PTR3]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* noundef [[TMP16]], %struct.BaseS1* noundef [[TMP18]]) +// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP4]], align 8 +// CHECK1-NEXT: call void @.omp_initializer..7(%struct.S.0* noundef [[VAR1]], %struct.S.0* noundef [[TMP6]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* @.init, align 4 +// CHECK1-NEXT: store float [[TMP19]], float* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP21]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[T_VAR3]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP21]] to i32 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP28:%.*]] = load float, float* [[T_VAR]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP28]] to i32 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP10]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 [[CONV]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[TMP5]], i64 0, i64 [[IDXPROM10]] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) %struct.S.0* @_ZN1SIfEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX11]], %struct.S.0* noundef nonnull align 4 dereferenceable(12) [[TMP23]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[TMP12]], i64 0, i64 [[IDXPROM6]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) %struct.S.0* @_ZN1SIfEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX7]], %struct.S.0* noundef nonnull align 4 dereferenceable(12) [[TMP30]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP25]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast float* [[T_VAR3]] to i8* -// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP29:%.*]] = bitcast %struct.S.0* [[VAR4]] to i8* -// CHECK1-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[VAR17]] to i8* -// CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast float* [[T_VAR18]] to i8* -// CHECK1-NEXT: store i8* [[TMP33]], i8** [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = bitcast [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP35:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 4, i64 32, i8* [[TMP34]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP35]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP21]]) +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast float* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP38:%.*]] = bitcast %struct.S.0* [[VAR1]] to i8* +// CHECK1-NEXT: store i8* [[TMP38]], i8** [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP40:%.*]] = bitcast float* [[T_VAR1]] to i8* +// CHECK1-NEXT: store i8* [[TMP40]], i8** [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = bitcast [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP42:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP21]], i32 4, i64 32, i8* [[TMP41]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP42]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: call void @.omp_combiner..2(float* noundef [[TMP0]], float* noundef [[T_VAR3]]) -// CHECK1-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[TMP7]] to i8* -// CHECK1-NEXT: [[ADD_PTR13:%.*]] = getelementptr inbounds i8, i8* [[TMP36]], i64 4 -// CHECK1-NEXT: [[TMP37:%.*]] = bitcast i8* [[ADD_PTR13]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP38:%.*]] = bitcast %struct.S.0* [[VAR4]] to i8* -// CHECK1-NEXT: [[ADD_PTR14:%.*]] = getelementptr inbounds i8, i8* [[TMP38]], i64 4 -// CHECK1-NEXT: [[TMP39:%.*]] = bitcast i8* [[ADD_PTR14]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP37]], %struct.BaseS1* noundef [[TMP39]]) -// CHECK1-NEXT: call void @.omp_combiner..6(%struct.S.0* noundef [[TMP2]], %struct.S.0* noundef [[VAR17]]) -// CHECK1-NEXT: call void @.omp_combiner..8(float* noundef [[TMP3]], float* noundef [[T_VAR18]]) -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..2(float* noundef [[TMP2]], float* noundef [[T_VAR]]) +// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.S.0* [[TMP14]] to i8* +// CHECK1-NEXT: [[ADD_PTR9:%.*]] = getelementptr inbounds i8, i8* [[TMP43]], i64 4 +// CHECK1-NEXT: [[TMP44:%.*]] = bitcast i8* [[ADD_PTR9]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, i8* [[TMP45]], i64 4 +// CHECK1-NEXT: [[TMP46:%.*]] = bitcast i8* [[ADD_PTR10]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP44]], %struct.BaseS1* noundef [[TMP46]]) +// CHECK1-NEXT: call void @.omp_combiner..6(%struct.S.0* noundef [[TMP6]], %struct.S.0* noundef [[VAR1]]) +// CHECK1-NEXT: call void @.omp_combiner..8(float* noundef [[TMP8]], float* noundef [[T_VAR1]]) +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP21]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..2(float* noundef [[TMP0]], float* noundef [[T_VAR3]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP40:%.*]] = bitcast %struct.S.0* [[TMP7]] to i8* -// CHECK1-NEXT: [[ADD_PTR15:%.*]] = getelementptr inbounds i8, i8* [[TMP40]], i64 4 -// CHECK1-NEXT: [[TMP41:%.*]] = bitcast i8* [[ADD_PTR15]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP42:%.*]] = bitcast %struct.S.0* [[VAR4]] to i8* -// CHECK1-NEXT: [[ADD_PTR16:%.*]] = getelementptr inbounds i8, i8* [[TMP42]], i64 4 -// CHECK1-NEXT: [[TMP43:%.*]] = bitcast i8* [[ADD_PTR16]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP41]], %struct.BaseS1* noundef [[TMP43]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..6(%struct.S.0* noundef [[TMP2]], %struct.S.0* noundef [[VAR17]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..8(float* noundef [[TMP3]], float* noundef [[T_VAR18]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..2(float* noundef [[TMP2]], float* noundef [[T_VAR]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %struct.S.0* [[TMP14]] to i8* +// CHECK1-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds i8, i8* [[TMP47]], i64 4 +// CHECK1-NEXT: [[TMP48:%.*]] = bitcast i8* [[ADD_PTR11]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP49:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// CHECK1-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds i8, i8* [[TMP49]], i64 4 +// CHECK1-NEXT: [[TMP50:%.*]] = bitcast i8* [[ADD_PTR12]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP48]], %struct.BaseS1* noundef [[TMP50]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..6(%struct.S.0* noundef [[TMP6]], %struct.S.0* noundef [[VAR1]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..8(float* noundef [[TMP8]], float* noundef [[T_VAR1]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP21]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(12) [[VAR17]]) #[[ATTR4]] -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(12) [[VAR4]]) #[[ATTR4]] -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP14]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(12) [[VAR1]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(12) [[VAR]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP21]]) // CHECK1-NEXT: ret void // // @@ -1076,15 +1123,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [10 x [4 x %struct.S.0]]* noundef nonnull align 4 dereferenceable(480) [[ARRS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[ARRS_ADDR:%.*]] = alloca [10 x [4 x %struct.S.0]]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1098,257 +1141,259 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i32* [[ARR]], i32** [[ARR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store [10 x [4 x %struct.S.0]]* [[ARRS]], [10 x [4 x %struct.S.0]]** [[ARRS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load [10 x [4 x %struct.S.0]]*, [10 x [4 x %struct.S.0]]** [[ARRS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load [10 x [4 x %struct.S.0]]*, [10 x [4 x %struct.S.0]]** [[TMP9]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 0 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX4]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] -// CHECK1-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[TMP8]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX5]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint i32* [[ARRAYIDX6]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint i32* [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = add nuw i64 [[TMP12]], 1 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP15:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA7:%.*]] = alloca i32, i64 [[TMP13]], align 16 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i32, i32* [[VLA7]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[VLA7]], [[TMP16]] +// CHECK1-NEXT: [[TMP11:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[TMP11]] +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP8]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP13]] +// CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX3]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP15:%.*]] = ptrtoint i32* [[ARRAYIDX4]] to i64 +// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint i32* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP17:%.*]] = sub i64 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP18:%.*]] = sdiv exact i64 [[TMP17]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP19:%.*]] = add nuw i64 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP21:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP19]], align 16 +// CHECK1-NEXT: store i64 [[TMP19]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i32, i32* [[VLA]], i64 [[TMP19]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[VLA]], [[TMP22]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[ARRAYIDX3]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[VLA7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[ARRAYIDX1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[VLA]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @.omp_initializer..11(i32* noundef [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32* noundef [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP16]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP22]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint i32* [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint i32* [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[VLA7]], i64 [[TMP20]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], [10 x [4 x %struct.S.0]]* [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[ARRAYIDX9]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAYDECAY]], i64 1 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX11]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN12:%.*]] = add nsw i64 0, [[TMP23]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], [10 x [4 x %struct.S.0]]* [[TMP4]], i64 0, i64 [[LB_ADD_LEN12]] -// CHECK1-NEXT: [[ARRAYDECAY14:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[ARRAYIDX13]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDECAY14]], i64 2 -// CHECK1-NEXT: [[TMP24:%.*]] = ptrtoint %struct.S.0* [[ARRAYIDX15]] to i64 -// CHECK1-NEXT: [[TMP25:%.*]] = ptrtoint %struct.S.0* [[ARRAYIDX10]] to i64 -// CHECK1-NEXT: [[TMP26:%.*]] = sub i64 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: [[TMP27:%.*]] = sdiv exact i64 [[TMP26]], ptrtoint (%struct.S.0* getelementptr ([[STRUCT_S_0]], %struct.S.0* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP28:%.*]] = add nuw i64 [[TMP27]], 1 -// CHECK1-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP28]], ptrtoint (%struct.S.0* getelementptr ([[STRUCT_S_0]], %struct.S.0* null, i32 1) to i64) -// CHECK1-NEXT: [[VLA16:%.*]] = alloca [[STRUCT_S_0]], i64 [[TMP28]], align 16 -// CHECK1-NEXT: store i64 [[TMP28]], i64* [[__VLA_EXPR1]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[VLA16]], i64 [[TMP28]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY17:%.*]] = icmp eq %struct.S.0* [[VLA16]], [[TMP30]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY17]], label [[OMP_ARRAYINIT_DONE25:%.*]], label [[OMP_ARRAYINIT_BODY18:%.*]] -// CHECK1: omp.arrayinit.body18: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST19:%.*]] = phi %struct.S.0* [ [[ARRAYIDX10]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT22:%.*]], [[OMP_ARRAYINIT_BODY18]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST20:%.*]] = phi %struct.S.0* [ [[VLA16]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYINIT_BODY18]] ] -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST20]] to i8* -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP31]], i64 4 -// CHECK1-NEXT: [[TMP32:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST19]] to i8* -// CHECK1-NEXT: [[ADD_PTR21:%.*]] = getelementptr inbounds i8, i8* [[TMP33]], i64 4 -// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i8* [[ADD_PTR21]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* noundef [[TMP32]], %struct.BaseS1* noundef [[TMP34]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT22]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP30]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYINIT_DONE25]], label [[OMP_ARRAYINIT_BODY18]] -// CHECK1: omp.arrayinit.done25: -// CHECK1-NEXT: [[TMP35:%.*]] = bitcast [10 x [4 x %struct.S.0]]* [[TMP4]] to %struct.S.0* -// CHECK1-NEXT: [[TMP36:%.*]] = ptrtoint %struct.S.0* [[TMP35]] to i64 -// CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint %struct.S.0* [[ARRAYIDX10]] to i64 -// CHECK1-NEXT: [[TMP38:%.*]] = sub i64 [[TMP36]], [[TMP37]] -// CHECK1-NEXT: [[TMP39:%.*]] = sdiv exact i64 [[TMP38]], ptrtoint (%struct.S.0* getelementptr ([[STRUCT_S_0]], %struct.S.0* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[VLA16]], i64 [[TMP39]] -// CHECK1-NEXT: [[TMP41:%.*]] = bitcast %struct.S.0* [[TMP40]] to [10 x [4 x %struct.S.0]]* -// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP43]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP44]], 9 +// CHECK1-NEXT: [[TMP23:%.*]] = ptrtoint i32* [[TMP6]] to i64 +// CHECK1-NEXT: [[TMP24:%.*]] = ptrtoint i32* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP25:%.*]] = sub i64 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: [[TMP26:%.*]] = sdiv exact i64 [[TMP25]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr i32, i32* [[VLA]], i64 [[TMP26]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], [10 x [4 x %struct.S.0]]* [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[ARRAYIDX6]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAYDECAY]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP8]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 0, [[TMP29]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], [10 x [4 x %struct.S.0]]* [[TMP10]], i64 0, i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: [[ARRAYDECAY11:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[ARRAYIDX10]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDECAY11]], i64 2 +// CHECK1-NEXT: [[TMP30:%.*]] = ptrtoint %struct.S.0* [[ARRAYIDX12]] to i64 +// CHECK1-NEXT: [[TMP31:%.*]] = ptrtoint %struct.S.0* [[ARRAYIDX7]] to i64 +// CHECK1-NEXT: [[TMP32:%.*]] = sub i64 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: [[TMP33:%.*]] = sdiv exact i64 [[TMP32]], ptrtoint (%struct.S.0* getelementptr ([[STRUCT_S_0]], %struct.S.0* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP34:%.*]] = add nuw i64 [[TMP33]], 1 +// CHECK1-NEXT: [[TMP35:%.*]] = mul nuw i64 [[TMP34]], ptrtoint (%struct.S.0* getelementptr ([[STRUCT_S_0]], %struct.S.0* null, i32 1) to i64) +// CHECK1-NEXT: [[VLA13:%.*]] = alloca [[STRUCT_S_0]], i64 [[TMP34]], align 16 +// CHECK1-NEXT: store i64 [[TMP34]], i64* [[__VLA_EXPR1]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[VLA13]], i64 [[TMP34]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY14:%.*]] = icmp eq %struct.S.0* [[VLA13]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY14]], label [[OMP_ARRAYINIT_DONE22:%.*]], label [[OMP_ARRAYINIT_BODY15:%.*]] +// CHECK1: omp.arrayinit.body15: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST16:%.*]] = phi %struct.S.0* [ [[ARRAYIDX7]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYINIT_BODY15]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST17:%.*]] = phi %struct.S.0* [ [[VLA13]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYINIT_BODY15]] ] +// CHECK1-NEXT: [[TMP37:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST17]] to i8* +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP37]], i64 4 +// CHECK1-NEXT: [[TMP38:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP39:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST16]] to i8* +// CHECK1-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, i8* [[TMP39]], i64 4 +// CHECK1-NEXT: [[TMP40:%.*]] = bitcast i8* [[ADD_PTR18]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* noundef [[TMP38]], %struct.BaseS1* noundef [[TMP40]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYINIT_DONE22]], label [[OMP_ARRAYINIT_BODY15]] +// CHECK1: omp.arrayinit.done22: +// CHECK1-NEXT: [[TMP41:%.*]] = bitcast [10 x [4 x %struct.S.0]]* [[TMP10]] to %struct.S.0* +// CHECK1-NEXT: [[TMP42:%.*]] = ptrtoint %struct.S.0* [[TMP41]] to i64 +// CHECK1-NEXT: [[TMP43:%.*]] = ptrtoint %struct.S.0* [[ARRAYIDX7]] to i64 +// CHECK1-NEXT: [[TMP44:%.*]] = sub i64 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: [[TMP45:%.*]] = sdiv exact i64 [[TMP44]], ptrtoint (%struct.S.0* getelementptr ([[STRUCT_S_0]], %struct.S.0* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[VLA13]], i64 [[TMP45]] +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %struct.S.0* [[TMP46]] to [10 x [4 x %struct.S.0]]* +// CHECK1-NEXT: [[TMP48:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP48]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP49]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP50]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP45]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP51]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP46]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP52]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP26:%.*]] = icmp sle i32 [[TMP47]], [[TMP48]] -// CHECK1-NEXT: br i1 [[CMP26]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP49]], 1 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP55]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i64 [[TMP50]] -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP51]] to i64 -// CHECK1-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX27]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[ARRAYIDX28]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP52]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[ARRAYIDX28]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i64 [[TMP56]] +// CHECK1-NEXT: [[TMP57:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP57]] to i64 +// CHECK1-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX24]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[ARRAYIDX25]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP58]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[ARRAYIDX25]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP53]], 1 -// CHECK1-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP59]], 1 +// CHECK1-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP54:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[TMP54]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP55]]) -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP57:%.*]] = bitcast i32* [[VLA7]] to i8* -// CHECK1-NEXT: store i8* [[TMP57]], i8** [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP59:%.*]] = inttoptr i64 [[TMP13]] to i8* -// CHECK1-NEXT: store i8* [[TMP59]], i8** [[TMP58]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP61:%.*]] = bitcast %struct.S.0* [[VLA16]] to i8* -// CHECK1-NEXT: store i8* [[TMP61]], i8** [[TMP60]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP63:%.*]] = inttoptr i64 [[TMP28]] to i8* +// CHECK1-NEXT: [[TMP60:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, i32* [[TMP60]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP61]]) +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP63:%.*]] = bitcast i32* [[VLA]] to i8* // CHECK1-NEXT: store i8* [[TMP63]], i8** [[TMP62]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = load i32, i32* [[TMP64]], align 4 -// CHECK1-NEXT: [[TMP66:%.*]] = bitcast [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP67:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP65]], i32 2, i64 32, i8* [[TMP66]], void (i8*, i8*)* @.omp.reduction.reduction_func.12, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP67]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP65:%.*]] = inttoptr i64 [[TMP19]] to i8* +// CHECK1-NEXT: store i8* [[TMP65]], i8** [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP67:%.*]] = bitcast %struct.S.0* [[VLA13]] to i8* +// CHECK1-NEXT: store i8* [[TMP67]], i8** [[TMP66]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP69:%.*]] = inttoptr i64 [[TMP34]] to i8* +// CHECK1-NEXT: store i8* [[TMP69]], i8** [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load i32, i32* [[TMP70]], align 4 +// CHECK1-NEXT: [[TMP72:%.*]] = bitcast [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP73:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP71]], i32 2, i64 32, i8* [[TMP72]], void (i8*, i8*)* @.omp.reduction.reduction_func.12, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP73]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr i32, i32* [[ARRAYIDX3]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[ARRAYIDX3]], [[TMP68]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE34:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr i32, i32* [[ARRAYIDX1]], i64 [[TMP19]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[ARRAYIDX1]], [[TMP74]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE31:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST30:%.*]] = phi i32* [ [[VLA7]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST31:%.*]] = phi i32* [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: call void @.omp_combiner..10(i32* noundef [[OMP_ARRAYCPY_DESTELEMENTPAST31]], i32* noundef [[OMP_ARRAYCPY_SRCELEMENTPAST30]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST31]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST30]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE33:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP68]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done34: -// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX10]], i64 [[TMP28]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY35:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX10]], [[TMP69]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY35]], label [[OMP_ARRAYCPY_DONE44:%.*]], label [[OMP_ARRAYCPY_BODY36:%.*]] -// CHECK1: omp.arraycpy.body36: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST37:%.*]] = phi %struct.S.0* [ [[VLA16]], [[OMP_ARRAYCPY_DONE34]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT42:%.*]], [[OMP_ARRAYCPY_BODY36]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST38:%.*]] = phi %struct.S.0* [ [[ARRAYIDX10]], [[OMP_ARRAYCPY_DONE34]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT41:%.*]], [[OMP_ARRAYCPY_BODY36]] ] -// CHECK1-NEXT: [[TMP70:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST38]] to i8* -// CHECK1-NEXT: [[ADD_PTR39:%.*]] = getelementptr inbounds i8, i8* [[TMP70]], i64 4 -// CHECK1-NEXT: [[TMP71:%.*]] = bitcast i8* [[ADD_PTR39]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP72:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST37]] to i8* -// CHECK1-NEXT: [[ADD_PTR40:%.*]] = getelementptr inbounds i8, i8* [[TMP72]], i64 4 -// CHECK1-NEXT: [[TMP73:%.*]] = bitcast i8* [[ADD_PTR40]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP71]], %struct.BaseS1* noundef [[TMP73]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT41]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST38]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT42]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST37]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE43:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT41]], [[TMP69]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE43]], label [[OMP_ARRAYCPY_DONE44]], label [[OMP_ARRAYCPY_BODY36]] -// CHECK1: omp.arraycpy.done44: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP65]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST27:%.*]] = phi i32* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST28:%.*]] = phi i32* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: call void @.omp_combiner..10(i32* noundef [[OMP_ARRAYCPY_DESTELEMENTPAST28]], i32* noundef [[OMP_ARRAYCPY_SRCELEMENTPAST27]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST28]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST27]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE30:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP74]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done31: +// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX7]], i64 [[TMP34]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY32:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX7]], [[TMP75]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY32]], label [[OMP_ARRAYCPY_DONE41:%.*]], label [[OMP_ARRAYCPY_BODY33:%.*]] +// CHECK1: omp.arraycpy.body33: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST34:%.*]] = phi %struct.S.0* [ [[VLA13]], [[OMP_ARRAYCPY_DONE31]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT39:%.*]], [[OMP_ARRAYCPY_BODY33]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST35:%.*]] = phi %struct.S.0* [ [[ARRAYIDX7]], [[OMP_ARRAYCPY_DONE31]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT38:%.*]], [[OMP_ARRAYCPY_BODY33]] ] +// CHECK1-NEXT: [[TMP76:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST35]] to i8* +// CHECK1-NEXT: [[ADD_PTR36:%.*]] = getelementptr inbounds i8, i8* [[TMP76]], i64 4 +// CHECK1-NEXT: [[TMP77:%.*]] = bitcast i8* [[ADD_PTR36]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP78:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST34]] to i8* +// CHECK1-NEXT: [[ADD_PTR37:%.*]] = getelementptr inbounds i8, i8* [[TMP78]], i64 4 +// CHECK1-NEXT: [[TMP79:%.*]] = bitcast i8* [[ADD_PTR37]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP77]], %struct.BaseS1* noundef [[TMP79]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT38]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST35]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT39]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST34]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE40:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT38]], [[TMP75]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE40]], label [[OMP_ARRAYCPY_DONE41]], label [[OMP_ARRAYCPY_BODY33]] +// CHECK1: omp.arraycpy.done41: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP71]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr i32, i32* [[ARRAYIDX3]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY45:%.*]] = icmp eq i32* [[ARRAYIDX3]], [[TMP74]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY45]], label [[OMP_ARRAYCPY_DONE52:%.*]], label [[OMP_ARRAYCPY_BODY46:%.*]] -// CHECK1: omp.arraycpy.body46: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST47:%.*]] = phi i32* [ [[VLA7]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT50:%.*]], [[OMP_ARRAYCPY_BODY46]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST48:%.*]] = phi i32* [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT49:%.*]], [[OMP_ARRAYCPY_BODY46]] ] -// CHECK1-NEXT: [[TMP75:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP76:%.*]] = load i32, i32* [[TMP75]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP76]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..10(i32* noundef [[OMP_ARRAYCPY_DESTELEMENTPAST48]], i32* noundef [[OMP_ARRAYCPY_SRCELEMENTPAST47]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP76]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT49]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST48]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT50]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST47]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE51:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT49]], [[TMP74]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE51]], label [[OMP_ARRAYCPY_DONE52]], label [[OMP_ARRAYCPY_BODY46]] -// CHECK1: omp.arraycpy.done52: -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX10]], i64 [[TMP28]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY53:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX10]], [[TMP77]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY53]], label [[OMP_ARRAYCPY_DONE62:%.*]], label [[OMP_ARRAYCPY_BODY54:%.*]] -// CHECK1: omp.arraycpy.body54: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST55:%.*]] = phi %struct.S.0* [ [[VLA16]], [[OMP_ARRAYCPY_DONE52]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT60:%.*]], [[OMP_ARRAYCPY_BODY54]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST56:%.*]] = phi %struct.S.0* [ [[ARRAYIDX10]], [[OMP_ARRAYCPY_DONE52]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT59:%.*]], [[OMP_ARRAYCPY_BODY54]] ] -// CHECK1-NEXT: [[TMP78:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP79:%.*]] = load i32, i32* [[TMP78]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP79]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP80:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST56]] to i8* -// CHECK1-NEXT: [[ADD_PTR57:%.*]] = getelementptr inbounds i8, i8* [[TMP80]], i64 4 -// CHECK1-NEXT: [[TMP81:%.*]] = bitcast i8* [[ADD_PTR57]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP82:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST55]] to i8* -// CHECK1-NEXT: [[ADD_PTR58:%.*]] = getelementptr inbounds i8, i8* [[TMP82]], i64 4 -// CHECK1-NEXT: [[TMP83:%.*]] = bitcast i8* [[ADD_PTR58]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP81]], %struct.BaseS1* noundef [[TMP83]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP79]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT59]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST56]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT60]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST55]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE61:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT59]], [[TMP77]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE61]], label [[OMP_ARRAYCPY_DONE62]], label [[OMP_ARRAYCPY_BODY54]] -// CHECK1: omp.arraycpy.done62: +// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr i32, i32* [[ARRAYIDX1]], i64 [[TMP19]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY42:%.*]] = icmp eq i32* [[ARRAYIDX1]], [[TMP80]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY42]], label [[OMP_ARRAYCPY_DONE49:%.*]], label [[OMP_ARRAYCPY_BODY43:%.*]] +// CHECK1: omp.arraycpy.body43: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST44:%.*]] = phi i32* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT47:%.*]], [[OMP_ARRAYCPY_BODY43]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST45:%.*]] = phi i32* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT46:%.*]], [[OMP_ARRAYCPY_BODY43]] ] +// CHECK1-NEXT: [[TMP81:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = load i32, i32* [[TMP81]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP82]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..10(i32* noundef [[OMP_ARRAYCPY_DESTELEMENTPAST45]], i32* noundef [[OMP_ARRAYCPY_SRCELEMENTPAST44]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP82]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT46]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST45]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT47]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST44]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE48:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT46]], [[TMP80]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE48]], label [[OMP_ARRAYCPY_DONE49]], label [[OMP_ARRAYCPY_BODY43]] +// CHECK1: omp.arraycpy.done49: +// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX7]], i64 [[TMP34]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY50:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX7]], [[TMP83]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY50]], label [[OMP_ARRAYCPY_DONE59:%.*]], label [[OMP_ARRAYCPY_BODY51:%.*]] +// CHECK1: omp.arraycpy.body51: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST52:%.*]] = phi %struct.S.0* [ [[VLA13]], [[OMP_ARRAYCPY_DONE49]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT57:%.*]], [[OMP_ARRAYCPY_BODY51]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST53:%.*]] = phi %struct.S.0* [ [[ARRAYIDX7]], [[OMP_ARRAYCPY_DONE49]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT56:%.*]], [[OMP_ARRAYCPY_BODY51]] ] +// CHECK1-NEXT: [[TMP84:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP85:%.*]] = load i32, i32* [[TMP84]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP85]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP86:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST53]] to i8* +// CHECK1-NEXT: [[ADD_PTR54:%.*]] = getelementptr inbounds i8, i8* [[TMP86]], i64 4 +// CHECK1-NEXT: [[TMP87:%.*]] = bitcast i8* [[ADD_PTR54]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP88:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST52]] to i8* +// CHECK1-NEXT: [[ADD_PTR55:%.*]] = getelementptr inbounds i8, i8* [[TMP88]], i64 4 +// CHECK1-NEXT: [[TMP89:%.*]] = bitcast i8* [[ADD_PTR55]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP87]], %struct.BaseS1* noundef [[TMP89]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP85]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT56]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST53]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT57]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST52]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE58:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT56]], [[TMP83]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE58]], label [[OMP_ARRAYCPY_DONE59]], label [[OMP_ARRAYCPY_BODY51]] +// CHECK1: omp.arraycpy.done59: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[VLA16]], i64 [[TMP28]] -// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[VLA16]], [[TMP84]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE63:%.*]], label [[ARRAYDESTROY_BODY:%.*]] +// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[VLA13]], i64 [[TMP34]] +// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[VLA13]], [[TMP90]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE60:%.*]], label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP84]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP90]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[VLA16]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE63]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done63: -// CHECK1-NEXT: [[TMP85:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP85]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[VLA13]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE60]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done60: +// CHECK1-NEXT: [[TMP91:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP91]]) // CHECK1-NEXT: ret void // // @@ -1443,14 +1488,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARR:%.*]], [10 x [4 x %struct.S.0]]* noundef nonnull align 4 dereferenceable(480) [[ARRS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARRS_ADDR:%.*]] = alloca [10 x [4 x %struct.S.0]]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1459,222 +1501,224 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARRS5:%.*]] = alloca [10 x [4 x %struct.S.0]], align 16 +// CHECK1-NEXT: [[ARRS:%.*]] = alloca [10 x [4 x %struct.S.0]], align 16 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i32* [[ARR]], i32** [[ARR_ADDR]], align 8 -// CHECK1-NEXT: store [10 x [4 x %struct.S.0]]* [[ARRS]], [10 x [4 x %struct.S.0]]** [[ARRS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [4 x %struct.S.0]]*, [10 x [4 x %struct.S.0]]** [[ARRS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load [10 x [4 x %struct.S.0]]*, [10 x [4 x %struct.S.0]]** [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], [[TMP1]] -// CHECK1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -// CHECK1-NEXT: [[TMP6:%.*]] = udiv exact i64 [[TMP5]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP7:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP7]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA3:%.*]] = alloca i32, i64 [[TMP6]], align 16 -// CHECK1-NEXT: store i64 [[TMP6]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[VLA3]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[VLA3]], [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP2]], [[TMP4]] +// CHECK1-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 +// CHECK1-NEXT: [[TMP11:%.*]] = udiv exact i64 [[TMP10]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP12:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP12]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP11]], align 16 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[VLA]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[VLA]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[VLA3]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[VLA]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT1:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @.omp_initializer..11(i32* noundef [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32* noundef [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP8]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT1]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT1]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], [10 x [4 x %struct.S.0]]* [[ARRS5]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [10 x [4 x %struct.S.0]]* [[TMP3]] to %struct.S.0* -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY6:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP10]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY6]], label [[OMP_ARRAYINIT_DONE14:%.*]], label [[OMP_ARRAYINIT_BODY7:%.*]] -// CHECK1: omp.arrayinit.body7: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST8:%.*]] = phi %struct.S.0* [ [[TMP9]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYINIT_BODY7]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYINIT_BODY7]] ] -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST9]] to i8* -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 4 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST8]] to i8* -// CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, i8* [[TMP13]], i64 4 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[ADD_PTR10]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* noundef [[TMP12]], %struct.BaseS1* noundef [[TMP14]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST8]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT12]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT12]], [[TMP10]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYINIT_DONE14]], label [[OMP_ARRAYINIT_BODY7]] -// CHECK1: omp.arrayinit.done14: -// CHECK1-NEXT: [[LHS_BEGIN:%.*]] = bitcast [10 x [4 x %struct.S.0]]* [[TMP3]] to %struct.S.0* -// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [10 x [4 x %struct.S.0]]* [[ARRS5]] to %struct.S.0* -// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP16]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 9 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], [10 x [4 x %struct.S.0]]* [[ARRS]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast [10 x [4 x %struct.S.0]]* [[TMP8]] to %struct.S.0* +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY2:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP15]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY2]], label [[OMP_ARRAYINIT_DONE10:%.*]], label [[OMP_ARRAYINIT_BODY3:%.*]] +// CHECK1: omp.arrayinit.body3: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST4:%.*]] = phi %struct.S.0* [ [[TMP14]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT7:%.*]], [[OMP_ARRAYINIT_BODY3]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST5:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYINIT_BODY3]] ] +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST5]] to i8* +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP16]], i64 4 +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST4]] to i8* +// CHECK1-NEXT: [[ADD_PTR6:%.*]] = getelementptr inbounds i8, i8* [[TMP18]], i64 4 +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i8* [[ADD_PTR6]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* noundef [[TMP17]], %struct.BaseS1* noundef [[TMP19]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT7]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST4]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST5]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP15]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYINIT_DONE10]], label [[OMP_ARRAYINIT_BODY3]] +// CHECK1: omp.arrayinit.done10: +// CHECK1-NEXT: [[LHS_BEGIN:%.*]] = bitcast [10 x [4 x %struct.S.0]]* [[TMP8]] to %struct.S.0* +// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [10 x [4 x %struct.S.0]]* [[ARRS]] to %struct.S.0* +// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP21]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP15:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: br i1 [[CMP15]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA3]], i64 [[TMP23]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP25]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[ARRAYIDX16]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 [[TMP28]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[ARRAYIDX12]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[ARRAYIDX12]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP26]], 1 -// CHECK1-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]]) -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP30:%.*]] = bitcast i32* [[VLA3]] to i8* -// CHECK1-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP6]] to i8* -// CHECK1-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[RHS_BEGIN]] to i8* -// CHECK1-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], i32 2, i64 24, i8* [[TMP37]], void (i8*, i8*)* @.omp.reduction.reduction_func.14, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP38]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP33]]) +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast i32* [[VLA]] to i8* +// CHECK1-NEXT: store i8* [[TMP35]], i8** [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP37:%.*]] = inttoptr i64 [[TMP11]] to i8* +// CHECK1-NEXT: store i8* [[TMP37]], i8** [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP39:%.*]] = bitcast %struct.S.0* [[RHS_BEGIN]] to i8* +// CHECK1-NEXT: store i8* [[TMP39]], i8** [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP43:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP41]], i32 2, i64 24, i8* [[TMP42]], void (i8*, i8*)* @.omp.reduction.reduction_func.14, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP43]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr i32, i32* [[TMP2]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[TMP2]], [[TMP39]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr i32, i32* [[TMP6]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[TMP6]], [[TMP44]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST18:%.*]] = phi i32* [ [[VLA3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST19:%.*]] = phi i32* [ [[TMP2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: call void @.omp_combiner..10(i32* noundef [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32* noundef [[OMP_ARRAYCPY_SRCELEMENTPAST18]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST18]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP39]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done22: -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[LHS_BEGIN]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] -// CHECK1: omp.arraycpy.body24: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[OMP_ARRAYCPY_DONE22]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[OMP_ARRAYCPY_BODY24]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[OMP_ARRAYCPY_DONE22]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[OMP_ARRAYCPY_BODY24]] ] -// CHECK1-NEXT: [[TMP41:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST26]] to i8* -// CHECK1-NEXT: [[ADD_PTR27:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 4 -// CHECK1-NEXT: [[TMP42:%.*]] = bitcast i8* [[ADD_PTR27]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST25]] to i8* -// CHECK1-NEXT: [[ADD_PTR28:%.*]] = getelementptr inbounds i8, i8* [[TMP43]], i64 4 -// CHECK1-NEXT: [[TMP44:%.*]] = bitcast i8* [[ADD_PTR28]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP42]], %struct.BaseS1* noundef [[TMP44]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY24]] -// CHECK1: omp.arraycpy.done32: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST14:%.*]] = phi i32* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi i32* [ [[TMP6]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: call void @.omp_combiner..10(i32* noundef [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32* noundef [[OMP_ARRAYCPY_SRCELEMENTPAST14]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST14]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP44]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done18: +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[LHS_BEGIN]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP45]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE28:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] +// CHECK1: omp.arraycpy.body20: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[OMP_ARRAYCPY_DONE18]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT26:%.*]], [[OMP_ARRAYCPY_BODY20]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[OMP_ARRAYCPY_DONE18]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY20]] ] +// CHECK1-NEXT: [[TMP46:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST22]] to i8* +// CHECK1-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, i8* [[TMP46]], i64 4 +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast i8* [[ADD_PTR23]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP48:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST21]] to i8* +// CHECK1-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds i8, i8* [[TMP48]], i64 4 +// CHECK1-NEXT: [[TMP49:%.*]] = bitcast i8* [[ADD_PTR24]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP47]], %struct.BaseS1* noundef [[TMP49]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT25]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT26]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE27:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT25]], [[TMP45]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_DONE28]], label [[OMP_ARRAYCPY_BODY20]] +// CHECK1: omp.arraycpy.done28: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP41]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr i32, i32* [[TMP2]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY33:%.*]] = icmp eq i32* [[TMP2]], [[TMP45]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY33]], label [[OMP_ARRAYCPY_DONE40:%.*]], label [[OMP_ARRAYCPY_BODY34:%.*]] -// CHECK1: omp.arraycpy.body34: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST35:%.*]] = phi i32* [ [[VLA3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT38:%.*]], [[OMP_ARRAYCPY_BODY34]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST36:%.*]] = phi i32* [ [[TMP2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT37:%.*]], [[OMP_ARRAYCPY_BODY34]] ] -// CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..10(i32* noundef [[OMP_ARRAYCPY_DESTELEMENTPAST36]], i32* noundef [[OMP_ARRAYCPY_SRCELEMENTPAST35]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT37]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST36]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT38]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST35]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE39:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT37]], [[TMP45]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE39]], label [[OMP_ARRAYCPY_DONE40]], label [[OMP_ARRAYCPY_BODY34]] -// CHECK1: omp.arraycpy.done40: -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[LHS_BEGIN]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY41:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP48]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY41]], label [[OMP_ARRAYCPY_DONE50:%.*]], label [[OMP_ARRAYCPY_BODY42:%.*]] -// CHECK1: omp.arraycpy.body42: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST43:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[OMP_ARRAYCPY_DONE40]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT48:%.*]], [[OMP_ARRAYCPY_BODY42]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST44:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[OMP_ARRAYCPY_DONE40]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT47:%.*]], [[OMP_ARRAYCPY_BODY42]] ] -// CHECK1-NEXT: [[TMP49:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[TMP49]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP50]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP51:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST44]] to i8* -// CHECK1-NEXT: [[ADD_PTR45:%.*]] = getelementptr inbounds i8, i8* [[TMP51]], i64 4 -// CHECK1-NEXT: [[TMP52:%.*]] = bitcast i8* [[ADD_PTR45]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP53:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST43]] to i8* -// CHECK1-NEXT: [[ADD_PTR46:%.*]] = getelementptr inbounds i8, i8* [[TMP53]], i64 4 -// CHECK1-NEXT: [[TMP54:%.*]] = bitcast i8* [[ADD_PTR46]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP52]], %struct.BaseS1* noundef [[TMP54]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP50]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT47]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST44]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT48]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST43]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE49:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT47]], [[TMP48]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE49]], label [[OMP_ARRAYCPY_DONE50]], label [[OMP_ARRAYCPY_BODY42]] -// CHECK1: omp.arraycpy.done50: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr i32, i32* [[TMP6]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY29:%.*]] = icmp eq i32* [[TMP6]], [[TMP50]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY29]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY30:%.*]] +// CHECK1: omp.arraycpy.body30: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST31:%.*]] = phi i32* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[OMP_ARRAYCPY_BODY30]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST32:%.*]] = phi i32* [ [[TMP6]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[OMP_ARRAYCPY_BODY30]] ] +// CHECK1-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..10(i32* noundef [[OMP_ARRAYCPY_DESTELEMENTPAST32]], i32* noundef [[OMP_ARRAYCPY_SRCELEMENTPAST31]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST32]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST31]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP50]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY30]] +// CHECK1: omp.arraycpy.done36: +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[LHS_BEGIN]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY37:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP53]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY37]], label [[OMP_ARRAYCPY_DONE46:%.*]], label [[OMP_ARRAYCPY_BODY38:%.*]] +// CHECK1: omp.arraycpy.body38: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST39:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[OMP_ARRAYCPY_DONE36]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT44:%.*]], [[OMP_ARRAYCPY_BODY38]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST40:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[OMP_ARRAYCPY_DONE36]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT43:%.*]], [[OMP_ARRAYCPY_BODY38]] ] +// CHECK1-NEXT: [[TMP54:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[TMP54]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP55]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP56:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST40]] to i8* +// CHECK1-NEXT: [[ADD_PTR41:%.*]] = getelementptr inbounds i8, i8* [[TMP56]], i64 4 +// CHECK1-NEXT: [[TMP57:%.*]] = bitcast i8* [[ADD_PTR41]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP58:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST39]] to i8* +// CHECK1-NEXT: [[ADD_PTR42:%.*]] = getelementptr inbounds i8, i8* [[TMP58]], i64 4 +// CHECK1-NEXT: [[TMP59:%.*]] = bitcast i8* [[ADD_PTR42]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP57]], %struct.BaseS1* noundef [[TMP59]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP55]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT43]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST40]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT44]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST39]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE45:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT43]], [[TMP53]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE45]], label [[OMP_ARRAYCPY_DONE46]], label [[OMP_ARRAYCPY_BODY38]] +// CHECK1: omp.arraycpy.done46: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP41]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN51:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], [10 x [4 x %struct.S.0]]* [[ARRS5]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN51]], i64 40 +// CHECK1-NEXT: [[ARRAY_BEGIN47:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], [10 x [4 x %struct.S.0]]* [[ARRS]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN47]], i64 40 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP55]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP60]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN51]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE52:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done52: -// CHECK1-NEXT: [[TMP56:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP56]]) -// CHECK1-NEXT: [[TMP57:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP57]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP58]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN47]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE48:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done48: +// CHECK1-NEXT: [[TMP61:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP61]]) +// CHECK1-NEXT: [[TMP62:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, i32* [[TMP62]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP63]]) // CHECK1-NEXT: ret void // // @@ -1738,11 +1782,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S.0*** noundef nonnull align 8 dereferenceable(8) [[VAR2:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VAR2_ADDR:%.*]] = alloca %struct.S.0***, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1757,176 +1801,178 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0*** [[VAR2]], %struct.S.0**** [[VAR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S.0***, %struct.S.0**** [[VAR2_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0***, %struct.S.0**** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S.0**, %struct.S.0*** [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds %struct.S.0*, %struct.S.0** [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S.0**, %struct.S.0*** [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds %struct.S.0*, %struct.S.0** [[TMP3]], i64 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[ARRAYIDX2]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP4]], i64 6 -// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint %struct.S.0* [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP6:%.*]] = ptrtoint %struct.S.0* [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = sub i64 [[TMP5]], [[TMP6]] -// CHECK1-NEXT: [[TMP8:%.*]] = sdiv exact i64 [[TMP7]], ptrtoint (%struct.S.0* getelementptr ([[STRUCT_S_0]], %struct.S.0* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP9:%.*]] = add nuw i64 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], ptrtoint (%struct.S.0* getelementptr ([[STRUCT_S_0]], %struct.S.0* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca [[STRUCT_S_0]], i64 [[TMP9]], align 16 -// CHECK1-NEXT: store i64 [[TMP9]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[VLA]], i64 [[TMP9]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[VLA]], [[TMP12]] +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S.0**, %struct.S.0*** [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds %struct.S.0*, %struct.S.0** [[TMP3]], i64 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S.0**, %struct.S.0*** [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds %struct.S.0*, %struct.S.0** [[TMP5]], i64 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP6]], i64 6 +// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint %struct.S.0* [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint %struct.S.0* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (%struct.S.0* getelementptr ([[STRUCT_S_0]], %struct.S.0* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (%struct.S.0* getelementptr ([[STRUCT_S_0]], %struct.S.0* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP13:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP13]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca [[STRUCT_S_0]], i64 [[TMP11]], align 16 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[VLA]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[VLA]], [[TMP14]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAYIDX1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[VLA]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP13]], i64 4 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK1-NEXT: [[ADD_PTR4:%.*]] = getelementptr inbounds i8, i8* [[TMP15]], i64 4 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8* [[ADD_PTR4]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* noundef [[TMP14]], %struct.BaseS1* noundef [[TMP16]]) +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP15]], i64 4 +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK1-NEXT: [[ADD_PTR4:%.*]] = getelementptr inbounds i8, i8* [[TMP17]], i64 4 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8* [[ADD_PTR4]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* noundef [[TMP16]], %struct.BaseS1* noundef [[TMP18]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP14]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP17:%.*]] = load %struct.S.0**, %struct.S.0*** [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint %struct.S.0* [[TMP18]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint %struct.S.0* [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = sub i64 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: [[TMP22:%.*]] = sdiv exact i64 [[TMP21]], ptrtoint (%struct.S.0* getelementptr ([[STRUCT_S_0]], %struct.S.0* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[VLA]], i64 [[TMP22]] +// CHECK1-NEXT: [[TMP19:%.*]] = load %struct.S.0**, %struct.S.0*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint %struct.S.0* [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = ptrtoint %struct.S.0* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP23:%.*]] = sub i64 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP24:%.*]] = sdiv exact i64 [[TMP23]], ptrtoint (%struct.S.0* getelementptr ([[STRUCT_S_0]], %struct.S.0* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[VLA]], i64 [[TMP24]] // CHECK1-NEXT: store %struct.S.0** [[_TMP7]], %struct.S.0*** [[_TMP6]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP23]], %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP26]], 9 +// CHECK1-NEXT: store %struct.S.0* [[TMP25]], %struct.S.0** [[_TMP7]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP28]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], 1 // CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP34]]) -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[VLA]] to i8* -// CHECK1-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP38:%.*]] = inttoptr i64 [[TMP9]] to i8* +// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP36]]) +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP38:%.*]] = bitcast %struct.S.0* [[VLA]] to i8* // CHECK1-NEXT: store i8* [[TMP38]], i8** [[TMP37]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP42:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP40]], i32 1, i64 16, i8* [[TMP41]], void (i8*, i8*)* @.omp.reduction.reduction_func.16, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP42]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP40:%.*]] = inttoptr i64 [[TMP11]] to i8* +// CHECK1-NEXT: store i8* [[TMP40]], i8** [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP44:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP42]], i32 1, i64 16, i8* [[TMP43]], void (i8*, i8*)* @.omp.reduction.reduction_func.16, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP44]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX1]], i64 [[TMP9]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX1]], [[TMP43]] +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX1]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX1]], [[TMP45]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE16:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST10:%.*]] = phi %struct.S.0* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi %struct.S.0* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT14:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP44:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST11]] to i8* -// CHECK1-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds i8, i8* [[TMP44]], i64 4 -// CHECK1-NEXT: [[TMP45:%.*]] = bitcast i8* [[ADD_PTR12]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP46:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST10]] to i8* -// CHECK1-NEXT: [[ADD_PTR13:%.*]] = getelementptr inbounds i8, i8* [[TMP46]], i64 4 -// CHECK1-NEXT: [[TMP47:%.*]] = bitcast i8* [[ADD_PTR13]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP45]], %struct.BaseS1* noundef [[TMP47]]) +// CHECK1-NEXT: [[TMP46:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST11]] to i8* +// CHECK1-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds i8, i8* [[TMP46]], i64 4 +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast i8* [[ADD_PTR12]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP48:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST10]] to i8* +// CHECK1-NEXT: [[ADD_PTR13:%.*]] = getelementptr inbounds i8, i8* [[TMP48]], i64 4 +// CHECK1-NEXT: [[TMP49:%.*]] = bitcast i8* [[ADD_PTR13]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP47]], %struct.BaseS1* noundef [[TMP49]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT14]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST10]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE15:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT14]], [[TMP43]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE15:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT14]], [[TMP45]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done16: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP40]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP42]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX1]], i64 [[TMP9]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY17:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX1]], [[TMP48]] +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX1]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY17:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX1]], [[TMP50]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY17]], label [[OMP_ARRAYCPY_DONE26:%.*]], label [[OMP_ARRAYCPY_BODY18:%.*]] // CHECK1: omp.arraycpy.body18: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST19:%.*]] = phi %struct.S.0* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY18]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST20:%.*]] = phi %struct.S.0* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY18]] ] -// CHECK1-NEXT: [[TMP49:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[TMP49]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP50]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP51:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST20]] to i8* -// CHECK1-NEXT: [[ADD_PTR21:%.*]] = getelementptr inbounds i8, i8* [[TMP51]], i64 4 -// CHECK1-NEXT: [[TMP52:%.*]] = bitcast i8* [[ADD_PTR21]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP53:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST19]] to i8* -// CHECK1-NEXT: [[ADD_PTR22:%.*]] = getelementptr inbounds i8, i8* [[TMP53]], i64 4 -// CHECK1-NEXT: [[TMP54:%.*]] = bitcast i8* [[ADD_PTR22]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP52]], %struct.BaseS1* noundef [[TMP54]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP50]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP53:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST20]] to i8* +// CHECK1-NEXT: [[ADD_PTR21:%.*]] = getelementptr inbounds i8, i8* [[TMP53]], i64 4 +// CHECK1-NEXT: [[TMP54:%.*]] = bitcast i8* [[ADD_PTR21]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP55:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST19]] to i8* +// CHECK1-NEXT: [[ADD_PTR22:%.*]] = getelementptr inbounds i8, i8* [[TMP55]], i64 4 +// CHECK1-NEXT: [[TMP56:%.*]] = bitcast i8* [[ADD_PTR22]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP54]], %struct.BaseS1* noundef [[TMP56]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT24]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP48]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP50]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_BODY18]] // CHECK1: omp.arraycpy.done26: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP40]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP42]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[VLA]], i64 [[TMP9]] -// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[VLA]], [[TMP55]] +// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[VLA]], i64 [[TMP11]] +// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[VLA]], [[TMP57]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE27:%.*]], label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP55]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP57]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[VLA]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE27]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done27: -// CHECK1-NEXT: [[TMP56:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP56]]) -// CHECK1-NEXT: [[TMP57:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP57]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP58]]) +// CHECK1-NEXT: [[TMP58:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP58]]) +// CHECK1-NEXT: [[TMP59:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP59]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP60]]) // CHECK1-NEXT: ret void // // @@ -1972,172 +2018,174 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [5 x %struct.S.0]* noundef nonnull align 4 dereferenceable(60) [[VVAR2:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VVAR2_ADDR:%.*]] = alloca [5 x %struct.S.0]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VVAR22:%.*]] = alloca [5 x %struct.S.0], align 16 +// CHECK1-NEXT: [[VVAR2:%.*]] = alloca [5 x %struct.S.0], align 16 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [5 x %struct.S.0]* [[VVAR2]], [5 x %struct.S.0]** [[VVAR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [5 x %struct.S.0]*, [5 x %struct.S.0]** [[VVAR2_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [5 x %struct.S.0]*, [5 x %struct.S.0]** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[TMP0]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[TMP0]], i64 0, i64 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[VVAR22]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP1]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[TMP2]], i64 0, i64 4 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[VVAR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 5 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP3]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[TMP2:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 4 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK1-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 4 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[ADD_PTR3]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* noundef [[TMP3]], %struct.BaseS1* noundef [[TMP5]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT3:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 4 +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK1-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 4 +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[ADD_PTR2]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* noundef [[TMP5]], %struct.BaseS1* noundef [[TMP7]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP1]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT3]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT3]], [[TMP3]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [5 x %struct.S.0]* [[TMP0]] to %struct.S.0* -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint %struct.S.0* [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint %struct.S.0* [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (%struct.S.0* getelementptr ([[STRUCT_S_0]], %struct.S.0* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [5 x %struct.S.0]* [[VVAR22]] to %struct.S.0* -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[TMP11]], i64 [[TMP10]] -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP12]] to [5 x %struct.S.0]* -// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [5 x %struct.S.0]* [[VVAR22]] to %struct.S.0* -// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 9 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast [5 x %struct.S.0]* [[TMP2]] to %struct.S.0* +// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint %struct.S.0* [[TMP8]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint %struct.S.0* [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (%struct.S.0* getelementptr ([[STRUCT_S_0]], %struct.S.0* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [5 x %struct.S.0]* [[VVAR2]] to %struct.S.0* +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[TMP13]], i64 [[TMP12]] +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP14]] to [5 x %struct.S.0]* +// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [5 x %struct.S.0]* [[VVAR2]] to %struct.S.0* +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[RHS_BEGIN]] to i8* -// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP28]], i32 1, i64 8, i8* [[TMP29]], void (i8*, i8*)* @.omp.reduction.reduction_func.18, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast %struct.S.0* [[RHS_BEGIN]] to i8* +// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], i32 1, i64 8, i8* [[TMP31]], void (i8*, i8*)* @.omp.reduction.reduction_func.18, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP32]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX]], i64 5 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP33]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST8]] to i8* -// CHECK1-NEXT: [[ADD_PTR9:%.*]] = getelementptr inbounds i8, i8* [[TMP32]], i64 4 -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i8* [[ADD_PTR9]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST7]] to i8* -// CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, i8* [[TMP34]], i64 4 -// CHECK1-NEXT: [[TMP35:%.*]] = bitcast i8* [[ADD_PTR10]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP33]], %struct.BaseS1* noundef [[TMP35]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE12:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done13: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST6:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST7:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST7]] to i8* +// CHECK1-NEXT: [[ADD_PTR8:%.*]] = getelementptr inbounds i8, i8* [[TMP34]], i64 4 +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast i8* [[ADD_PTR8]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST6]] to i8* +// CHECK1-NEXT: [[ADD_PTR9:%.*]] = getelementptr inbounds i8, i8* [[TMP36]], i64 4 +// CHECK1-NEXT: [[TMP37:%.*]] = bitcast i8* [[ADD_PTR9]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP35]], %struct.BaseS1* noundef [[TMP37]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT10]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST7]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST6]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT10]], [[TMP33]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done12: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY14:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP36]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY14]], label [[OMP_ARRAYCPY_DONE23:%.*]], label [[OMP_ARRAYCPY_BODY15:%.*]] -// CHECK1: omp.arraycpy.body15: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST16:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY15]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST17:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY15]] ] -// CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP39:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST17]] to i8* -// CHECK1-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, i8* [[TMP39]], i64 4 -// CHECK1-NEXT: [[TMP40:%.*]] = bitcast i8* [[ADD_PTR18]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP41:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST16]] to i8* -// CHECK1-NEXT: [[ADD_PTR19:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 4 -// CHECK1-NEXT: [[TMP42:%.*]] = bitcast i8* [[ADD_PTR19]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP40]], %struct.BaseS1* noundef [[TMP42]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT21]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST16]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE22:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP36]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_DONE23]], label [[OMP_ARRAYCPY_BODY15]] -// CHECK1: omp.arraycpy.done23: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX]], i64 5 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY13:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP38]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY13]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY14:%.*]] +// CHECK1: omp.arraycpy.body14: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST15:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY14]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY14]] ] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP41:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST16]] to i8* +// CHECK1-NEXT: [[ADD_PTR17:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 4 +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast i8* [[ADD_PTR17]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST15]] to i8* +// CHECK1-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, i8* [[TMP43]], i64 4 +// CHECK1-NEXT: [[TMP44:%.*]] = bitcast i8* [[ADD_PTR18]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP42]], %struct.BaseS1* noundef [[TMP44]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT20]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP38]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY14]] +// CHECK1: omp.arraycpy.done22: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN24:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[VVAR22]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN24]], i64 5 +// CHECK1-NEXT: [[ARRAY_BEGIN23:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[VVAR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN23]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP43]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP45]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN24]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE25:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done25: -// CHECK1-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP45]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN23]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE24:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done24: +// CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP47]]) // CHECK1-NEXT: ret void // // @@ -2180,11 +2228,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [4 x %struct.S.0]* noundef nonnull align 4 dereferenceable(48) [[VAR3:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VAR3_ADDR:%.*]] = alloca [4 x %struct.S.0]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca [4 x %struct.S.0]*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca [4 x %struct.S.0]*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2193,169 +2241,171 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR34:%.*]] = alloca [2 x %struct.S.0], align 16 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca [4 x %struct.S.0]*, align 8 +// CHECK1-NEXT: [[VAR3:%.*]] = alloca [2 x %struct.S.0], align 16 +// CHECK1-NEXT: [[_TMP6:%.*]] = alloca [4 x %struct.S.0]*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [4 x %struct.S.0]* [[VAR3]], [4 x %struct.S.0]** [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: store [4 x %struct.S.0]* [[TMP0]], [4 x %struct.S.0]** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[TMP]], align 8 -// CHECK1-NEXT: store [4 x %struct.S.0]* [[TMP1]], [4 x %struct.S.0]** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[TMP1]], align 8 +// CHECK1-NEXT: store [4 x %struct.S.0]* [[TMP2]], [4 x %struct.S.0]** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[TMP]], align 8 +// CHECK1-NEXT: store [4 x %struct.S.0]* [[TMP3]], [4 x %struct.S.0]** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[TMP3]], i64 0, i64 2 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[VAR34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP4]] +// CHECK1-NEXT: [[TMP4:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[TMP5]], i64 0, i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT6:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 4 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK1-NEXT: [[ADD_PTR5:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 4 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[ADD_PTR5]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* noundef [[TMP6]], %struct.BaseS1* noundef [[TMP8]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 4 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK1-NEXT: [[ADD_PTR4:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 4 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[ADD_PTR4]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* noundef [[TMP8]], %struct.BaseS1* noundef [[TMP10]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT6]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT6]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP9:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [4 x %struct.S.0]* [[TMP9]] to %struct.S.0* -// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint %struct.S.0* [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = ptrtoint %struct.S.0* [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (%struct.S.0* getelementptr ([[STRUCT_S_0]], %struct.S.0* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast [2 x %struct.S.0]* [[VAR34]] to %struct.S.0* -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[TMP15]], i64 [[TMP14]] -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast %struct.S.0* [[TMP16]] to [4 x %struct.S.0]* -// CHECK1-NEXT: store [4 x %struct.S.0]* [[TMP17]], [4 x %struct.S.0]** [[_TMP7]], align 8 -// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [2 x %struct.S.0]* [[VAR34]] to %struct.S.0* -// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP19]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 9 +// CHECK1-NEXT: [[TMP11:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [4 x %struct.S.0]* [[TMP11]] to %struct.S.0* +// CHECK1-NEXT: [[TMP13:%.*]] = ptrtoint %struct.S.0* [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint %struct.S.0* [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = sub i64 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = sdiv exact i64 [[TMP15]], ptrtoint (%struct.S.0* getelementptr ([[STRUCT_S_0]], %struct.S.0* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S.0]* [[VAR3]] to %struct.S.0* +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[TMP17]], i64 [[TMP16]] +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[TMP18]] to [4 x %struct.S.0]* +// CHECK1-NEXT: store [4 x %struct.S.0]* [[TMP19]], [4 x %struct.S.0]** [[_TMP6]], align 8 +// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [2 x %struct.S.0]* [[VAR3]] to %struct.S.0* +// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP21]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]]) -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[RHS_BEGIN]] to i8* -// CHECK1-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP34:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], i32 1, i64 8, i8* [[TMP33]], void (i8*, i8*)* @.omp.reduction.reduction_func.20, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP34]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP30]]) +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[RHS_BEGIN]] to i8* +// CHECK1-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], i32 1, i64 8, i8* [[TMP35]], void (i8*, i8*)* @.omp.reduction.reduction_func.20, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP36]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP35]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE16:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP37]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE15:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST10:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT14:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST11]] to i8* -// CHECK1-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds i8, i8* [[TMP36]], i64 4 -// CHECK1-NEXT: [[TMP37:%.*]] = bitcast i8* [[ADD_PTR12]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP38:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST10]] to i8* -// CHECK1-NEXT: [[ADD_PTR13:%.*]] = getelementptr inbounds i8, i8* [[TMP38]], i64 4 -// CHECK1-NEXT: [[TMP39:%.*]] = bitcast i8* [[ADD_PTR13]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP37]], %struct.BaseS1* noundef [[TMP39]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT14]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST10]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE15:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT14]], [[TMP35]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done16: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT13:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP38:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST10]] to i8* +// CHECK1-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds i8, i8* [[TMP38]], i64 4 +// CHECK1-NEXT: [[TMP39:%.*]] = bitcast i8* [[ADD_PTR11]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP40:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST9]] to i8* +// CHECK1-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds i8, i8* [[TMP40]], i64 4 +// CHECK1-NEXT: [[TMP41:%.*]] = bitcast i8* [[ADD_PTR12]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP39]], %struct.BaseS1* noundef [[TMP41]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT13]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE14:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT13]], [[TMP37]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done15: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY17:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY17]], label [[OMP_ARRAYCPY_DONE26:%.*]], label [[OMP_ARRAYCPY_BODY18:%.*]] -// CHECK1: omp.arraycpy.body18: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST19:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY18]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST20:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY18]] ] -// CHECK1-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP42]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST20]] to i8* -// CHECK1-NEXT: [[ADD_PTR21:%.*]] = getelementptr inbounds i8, i8* [[TMP43]], i64 4 -// CHECK1-NEXT: [[TMP44:%.*]] = bitcast i8* [[ADD_PTR21]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP45:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST19]] to i8* -// CHECK1-NEXT: [[ADD_PTR22:%.*]] = getelementptr inbounds i8, i8* [[TMP45]], i64 4 -// CHECK1-NEXT: [[TMP46:%.*]] = bitcast i8* [[ADD_PTR22]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP44]], %struct.BaseS1* noundef [[TMP46]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP42]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT24]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_BODY18]] -// CHECK1: omp.arraycpy.done26: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY16:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP42]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY16]], label [[OMP_ARRAYCPY_DONE25:%.*]], label [[OMP_ARRAYCPY_BODY17:%.*]] +// CHECK1: omp.arraycpy.body17: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST18:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY17]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST19:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY17]] ] +// CHECK1-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST19]] to i8* +// CHECK1-NEXT: [[ADD_PTR20:%.*]] = getelementptr inbounds i8, i8* [[TMP45]], i64 4 +// CHECK1-NEXT: [[TMP46:%.*]] = bitcast i8* [[ADD_PTR20]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST18]] to i8* +// CHECK1-NEXT: [[ADD_PTR21:%.*]] = getelementptr inbounds i8, i8* [[TMP47]], i64 4 +// CHECK1-NEXT: [[TMP48:%.*]] = bitcast i8* [[ADD_PTR21]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP46]], %struct.BaseS1* noundef [[TMP48]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT22]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT23]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST18]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT22]], [[TMP42]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_BODY17]] +// CHECK1: omp.arraycpy.done25: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN27:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[VAR34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN27]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN26:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN26]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP47]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP49]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN27]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE28:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done28: -// CHECK1-NEXT: [[TMP48:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP48]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP49]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN26]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE27:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done27: +// CHECK1-NEXT: [[TMP50:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[TMP50]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP51]]) // CHECK1-NEXT: ret void // // @@ -2398,11 +2448,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..21 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [4 x %struct.S.0]* noundef nonnull align 4 dereferenceable(48) [[VAR3:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VAR3_ADDR:%.*]] = alloca [4 x %struct.S.0]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca [4 x %struct.S.0]*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca [4 x %struct.S.0]*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2416,149 +2466,151 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [4 x %struct.S.0]* [[VAR3]], [4 x %struct.S.0]** [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: store [4 x %struct.S.0]* [[TMP0]], [4 x %struct.S.0]** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[TMP]], align 8 -// CHECK1-NEXT: store [4 x %struct.S.0]* [[TMP1]], [4 x %struct.S.0]** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[TMP1]], align 8 +// CHECK1-NEXT: store [4 x %struct.S.0]* [[TMP2]], [4 x %struct.S.0]** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[TMP]], align 8 +// CHECK1-NEXT: store [4 x %struct.S.0]* [[TMP3]], [4 x %struct.S.0]** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: [[DOTVAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP4]], i64 48, i8* inttoptr (i64 6 to i8*)) +// CHECK1-NEXT: [[TMP4:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: [[DOTVAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP6]], i64 48, i8* inttoptr (i64 6 to i8*)) // CHECK1-NEXT: [[DOTVAR3__ADDR:%.*]] = bitcast i8* [[DOTVAR3__VOID_ADDR]] to [4 x %struct.S.0]* // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[DOTVAR3__ADDR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [4 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [4 x %struct.S.0]* [[TMP4]] to %struct.S.0* +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 4 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 4 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK1-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 4 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[ADD_PTR3]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* noundef [[TMP8]], %struct.BaseS1* noundef [[TMP10]]) +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 4 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK1-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 4 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[ADD_PTR3]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* noundef [[TMP10]], %struct.BaseS1* noundef [[TMP12]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP6]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: store [4 x %struct.S.0]* [[DOTVAR3__ADDR]], [4 x %struct.S.0]** [[_TMP5]], align 8 -// CHECK1-NEXT: [[LHS_BEGIN:%.*]] = bitcast [4 x %struct.S.0]* [[TMP2]] to %struct.S.0* +// CHECK1-NEXT: [[LHS_BEGIN:%.*]] = bitcast [4 x %struct.S.0]* [[TMP4]] to %struct.S.0* // CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [4 x %struct.S.0]* [[DOTVAR3__ADDR]] to %struct.S.0* -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[RHS_BEGIN]] to i8* -// CHECK1-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP20]], void (i8*, i8*)* @.omp.reduction.reduction_func.22, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S.0* [[RHS_BEGIN]] to i8* +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1, i64 8, i8* [[TMP22]], void (i8*, i8*)* @.omp.reduction.reduction_func.22, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[LHS_BEGIN]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP22]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[LHS_BEGIN]], i64 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP24]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST8:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST9]] to i8* -// CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, i8* [[TMP23]], i64 4 -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i8* [[ADD_PTR10]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST8]] to i8* -// CHECK1-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds i8, i8* [[TMP25]], i64 4 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i8* [[ADD_PTR11]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP24]], %struct.BaseS1* noundef [[TMP26]]) +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST9]] to i8* +// CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, i8* [[TMP25]], i64 4 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i8* [[ADD_PTR10]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST8]] to i8* +// CHECK1-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds i8, i8* [[TMP27]], i64 4 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i8* [[ADD_PTR11]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP26]], %struct.BaseS1* noundef [[TMP28]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT12]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST8]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT12]], [[TMP22]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT12]], [[TMP24]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done14: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[LHS_BEGIN]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY15:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP27]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[LHS_BEGIN]], i64 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY15:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP29]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY15]], label [[OMP_ARRAYCPY_DONE24:%.*]], label [[OMP_ARRAYCPY_BODY16:%.*]] // CHECK1: omp.arraycpy.body16: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST17:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY16]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST18:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY16]] ] -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST18]] to i8* -// CHECK1-NEXT: [[ADD_PTR19:%.*]] = getelementptr inbounds i8, i8* [[TMP28]], i64 4 -// CHECK1-NEXT: [[TMP29:%.*]] = bitcast i8* [[ADD_PTR19]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST17]] to i8* -// CHECK1-NEXT: [[ADD_PTR20:%.*]] = getelementptr inbounds i8, i8* [[TMP30]], i64 4 -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i8* [[ADD_PTR20]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP29]], %struct.BaseS1* noundef [[TMP31]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST18]] to i8* +// CHECK1-NEXT: [[ADD_PTR19:%.*]] = getelementptr inbounds i8, i8* [[TMP30]], i64 4 +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i8* [[ADD_PTR19]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST17]] to i8* +// CHECK1-NEXT: [[ADD_PTR20:%.*]] = getelementptr inbounds i8, i8* [[TMP32]], i64 4 +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i8* [[ADD_PTR20]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP31]], %struct.BaseS1* noundef [[TMP33]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT21]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST18]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT22]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST17]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE23:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT21]], [[TMP27]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE23:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT21]], [[TMP29]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE23]], label [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_BODY16]] // CHECK1: omp.arraycpy.done24: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: [[ARRAY_BEGIN25:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[DOTVAR3__ADDR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN25]], i64 4 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN25]], i64 4 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP32]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP34]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN25]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE26:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done26: -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast [4 x %struct.S.0]* [[DOTVAR3__ADDR]] to i8* -// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP4]], i8* [[TMP33]], i8* inttoptr (i64 6 to i8*)) -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast [4 x %struct.S.0]* [[DOTVAR3__ADDR]] to i8* +// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP6]], i8* [[TMP35]], i8* inttoptr (i64 6 to i8*)) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP6]]) // CHECK1-NEXT: ret void // // @@ -2613,6 +2665,9 @@ // CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[ARR:%.*]] = alloca [42 x %struct.S], align 16 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(12) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* @@ -2633,37 +2688,67 @@ // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S*, %struct.S** [[VAR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.S*, %struct.S*, i32*, [2 x i32]*, [2 x %struct.S]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32* [[T_VAR]], %struct.S* [[TMP1]], %struct.S* [[VAR1]], i32* [[T_VAR1]], [2 x i32]* [[VEC]], [2 x %struct.S]* [[S_ARR]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* [[T_VAR]], [2 x i32]* [[VEC]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [42 x %struct.S]*, [2 x i32]*, i32*, [2 x %struct.S]*, %struct.S*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), [42 x %struct.S]* [[ARR]], [2 x i32]* [[VEC]], i32* [[T_VAR]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[TMP3]]) +// CHECK1-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.S* [[VAR1]], %struct.S** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i32* [[T_VAR1]], i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP7]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 1 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[VAR]], align 8 +// CHECK1-NEXT: store %struct.S* [[TMP12]], %struct.S** [[TMP11]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK1-NEXT: store [42 x %struct.S]* [[ARR]], [42 x %struct.S]** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 1 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 3 +// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load %struct.S*, %struct.S** [[VAR]], align 8 +// CHECK1-NEXT: store %struct.S* [[TMP18]], %struct.S** [[TMP17]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN1:%.*]] = getelementptr inbounds [42 x %struct.S], [42 x %struct.S]* [[ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN1]], i64 42 +// CHECK1-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [42 x %struct.S], [42 x %struct.S]* [[ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN3]], i64 42 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP4]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN1]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done2: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN3]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done4: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(12) [[VAR1]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN3]], i64 2 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY4:%.*]] -// CHECK1: arraydestroy.body4: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST5:%.*]] = phi %struct.S* [ [[TMP5]], [[ARRAYDESTROY_DONE2]] ], [ [[ARRAYDESTROY_ELEMENT6:%.*]], [[ARRAYDESTROY_BODY4]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT6]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST5]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT6]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE7:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT6]], [[ARRAY_BEGIN3]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE7]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY4]] -// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i64 2 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY6:%.*]] +// CHECK1: arraydestroy.body6: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST7:%.*]] = phi %struct.S* [ [[TMP20]], [[ARRAYDESTROY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT8:%.*]], [[ARRAYDESTROY_BODY6]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT8]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST7]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT8]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE9:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT8]], [[ARRAY_BEGIN5]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE9]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY6]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(12) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP6]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP21]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -2728,16 +2813,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(12) [[VAR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(12) [[VAR1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(24) [[S_ARR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2746,151 +2826,153 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[VAR17:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[T_VAR18:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca %struct.S*, align 8 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR1]], %struct.S** [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR1]], i32** [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[TMP1]], %struct.S** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK1-NEXT: store %struct.S* [[TMP6]], %struct.S** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S*, %struct.S** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S*, %struct.S** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP11]], align 8 +// CHECK1-NEXT: store %struct.S* [[TMP4]], %struct.S** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.S* [[TMP13]], %struct.S** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @.omp_initializer..25(i32* noundef [[T_VAR3]], i32* noundef [[TMP0]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP8]], i64 4 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct.S* [[TMP7]] to i8* -// CHECK1-NEXT: [[ADD_PTR5:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 4 -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8* [[ADD_PTR5]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* noundef [[TMP9]], %struct.BaseS1* noundef [[TMP11]]) -// CHECK1-NEXT: store %struct.S* [[VAR4]], %struct.S** [[_TMP6]], align 8 -// CHECK1-NEXT: call void @.omp_initializer..27(%struct.S* noundef [[VAR17]], %struct.S* noundef [[TMP2]]) -// CHECK1-NEXT: call void @.omp_initializer..29(i32* noundef [[T_VAR18]], i32* noundef [[TMP3]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 +// CHECK1-NEXT: call void @.omp_initializer..25(i32* noundef [[T_VAR]], i32* noundef [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP15]], i64 4 +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast %struct.S* [[TMP14]] to i8* +// CHECK1-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i8, i8* [[TMP17]], i64 4 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8* [[ADD_PTR3]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* noundef [[TMP16]], %struct.BaseS1* noundef [[TMP18]]) +// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP4]], align 8 +// CHECK1-NEXT: call void @.omp_initializer..27(%struct.S* noundef [[VAR1]], %struct.S* noundef [[TMP6]]) +// CHECK1-NEXT: call void @.omp_initializer..29(i32* noundef [[T_VAR1]], i32* noundef [[TMP8]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP5]], i64 0, i64 [[IDXPROM10]] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) %struct.S* @_ZN1SIiEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX11]], %struct.S* noundef nonnull align 4 dereferenceable(12) [[TMP22]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP10]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load %struct.S*, %struct.S** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP12]], i64 0, i64 [[IDXPROM6]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) %struct.S* @_ZN1SIiEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX7]], %struct.S* noundef nonnull align 4 dereferenceable(12) [[TMP29]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]]) -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[T_VAR3]] to i8* -// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[VAR17]] to i8* -// CHECK1-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP32:%.*]] = bitcast i32* [[T_VAR18]] to i8* -// CHECK1-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP34:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 4, i64 32, i8* [[TMP33]], void (i8*, i8*)* @.omp.reduction.reduction_func.30, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP34]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP33]], i8** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP35]], i8** [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[VAR1]] to i8* +// CHECK1-NEXT: store i8* [[TMP37]], i8** [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP39:%.*]] = bitcast i32* [[T_VAR1]] to i8* +// CHECK1-NEXT: store i8* [[TMP39]], i8** [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = bitcast [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP41:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], i32 4, i64 32, i8* [[TMP40]], void (i8*, i8*)* @.omp.reduction.reduction_func.30, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP41]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: call void @.omp_combiner..24(i32* noundef [[TMP0]], i32* noundef [[T_VAR3]]) -// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[TMP7]] to i8* -// CHECK1-NEXT: [[ADD_PTR13:%.*]] = getelementptr inbounds i8, i8* [[TMP35]], i64 4 -// CHECK1-NEXT: [[TMP36:%.*]] = bitcast i8* [[ADD_PTR13]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK1-NEXT: [[ADD_PTR14:%.*]] = getelementptr inbounds i8, i8* [[TMP37]], i64 4 -// CHECK1-NEXT: [[TMP38:%.*]] = bitcast i8* [[ADD_PTR14]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP36]], %struct.BaseS1* noundef [[TMP38]]) -// CHECK1-NEXT: call void @.omp_combiner..26(%struct.S* noundef [[TMP2]], %struct.S* noundef [[VAR17]]) -// CHECK1-NEXT: call void @.omp_combiner..28(i32* noundef [[TMP3]], i32* noundef [[T_VAR18]]) -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..24(i32* noundef [[TMP2]], i32* noundef [[T_VAR]]) +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast %struct.S* [[TMP14]] to i8* +// CHECK1-NEXT: [[ADD_PTR9:%.*]] = getelementptr inbounds i8, i8* [[TMP42]], i64 4 +// CHECK1-NEXT: [[TMP43:%.*]] = bitcast i8* [[ADD_PTR9]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP44:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, i8* [[TMP44]], i64 4 +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast i8* [[ADD_PTR10]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP43]], %struct.BaseS1* noundef [[TMP45]]) +// CHECK1-NEXT: call void @.omp_combiner..26(%struct.S* noundef [[TMP6]], %struct.S* noundef [[VAR1]]) +// CHECK1-NEXT: call void @.omp_combiner..28(i32* noundef [[TMP8]], i32* noundef [[T_VAR1]]) +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..24(i32* noundef [[TMP0]], i32* noundef [[T_VAR3]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[TMP7]] to i8* -// CHECK1-NEXT: [[ADD_PTR15:%.*]] = getelementptr inbounds i8, i8* [[TMP39]], i64 4 -// CHECK1-NEXT: [[TMP40:%.*]] = bitcast i8* [[ADD_PTR15]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP41:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK1-NEXT: [[ADD_PTR16:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 4 -// CHECK1-NEXT: [[TMP42:%.*]] = bitcast i8* [[ADD_PTR16]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP40]], %struct.BaseS1* noundef [[TMP42]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..26(%struct.S* noundef [[TMP2]], %struct.S* noundef [[VAR17]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..28(i32* noundef [[TMP3]], i32* noundef [[T_VAR18]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..24(i32* noundef [[TMP2]], i32* noundef [[T_VAR]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[TMP14]] to i8* +// CHECK1-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds i8, i8* [[TMP46]], i64 4 +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast i8* [[ADD_PTR11]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds i8, i8* [[TMP48]], i64 4 +// CHECK1-NEXT: [[TMP49:%.*]] = bitcast i8* [[ADD_PTR12]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* noundef [[TMP47]], %struct.BaseS1* noundef [[TMP49]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..26(%struct.S* noundef [[TMP6]], %struct.S* noundef [[VAR1]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..28(i32* noundef [[TMP8]], i32* noundef [[T_VAR1]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(12) [[VAR17]]) #[[ATTR4]] -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(12) [[VAR4]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(12) [[VAR1]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(12) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -3037,14 +3119,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(24) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(12) [[VAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3053,98 +3132,100 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK1-NEXT: store %struct.S* [[TMP4]], %struct.S** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.S* [[TMP9]], %struct.S** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* @.init.32, align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* @.init.32, align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) %struct.S* @_ZN1SIiEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX6]], %struct.S* noundef nonnull align 4 dereferenceable(12) [[TMP16]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) %struct.S* @_ZN1SIiEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX5]], %struct.S* noundef nonnull align 4 dereferenceable(12) [[TMP21]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[T_VAR3]] to i8* -// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func.33, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.33, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: call void @.omp_combiner..34(i32* noundef [[TMP0]], i32* noundef [[T_VAR3]]) -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..34(i32* noundef [[TMP2]], i32* noundef [[T_VAR]]) +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..34(i32* noundef [[TMP0]], i32* noundef [[T_VAR3]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..34(i32* noundef [[TMP2]], i32* noundef [[T_VAR]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP7]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -3185,15 +3266,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [42 x %struct.S]* noundef nonnull align 4 dereferenceable(504) [[ARR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(24) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(12) [[VAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARR_ADDR:%.*]] = alloca [42 x %struct.S]*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3202,182 +3279,184 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARR4:%.*]] = alloca [40 x %struct.S], align 16 +// CHECK1-NEXT: [[ARR:%.*]] = alloca [40 x %struct.S], align 16 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [42 x %struct.S]* [[ARR]], [42 x %struct.S]** [[ARR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [42 x %struct.S]*, [42 x %struct.S]** [[ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[TMP4]], %struct.S** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK1-NEXT: store %struct.S* [[TMP5]], %struct.S** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [42 x %struct.S]*, [42 x %struct.S]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.S*, %struct.S** [[TMP9]], align 8 +// CHECK1-NEXT: store %struct.S* [[TMP10]], %struct.S** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.S* [[TMP11]], %struct.S** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [42 x %struct.S], [42 x %struct.S]* [[TMP0]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [42 x %struct.S], [42 x %struct.S]* [[TMP0]], i64 0, i64 40 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [40 x %struct.S], [40 x %struct.S]* [[ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [42 x %struct.S], [42 x %struct.S]* [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [42 x %struct.S], [42 x %struct.S]* [[TMP2]], i64 0, i64 40 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [40 x %struct.S], [40 x %struct.S]* [[ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP12]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT6:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 4 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK1-NEXT: [[ADD_PTR5:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 4 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[ADD_PTR5]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_initializer..37(%struct.BaseS1* noundef [[TMP8]], %struct.BaseS1* noundef [[TMP10]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP13]], i64 4 +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK1-NEXT: [[ADD_PTR4:%.*]] = getelementptr inbounds i8, i8* [[TMP15]], i64 4 +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8* [[ADD_PTR4]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_initializer..37(%struct.BaseS1* noundef [[TMP14]], %struct.BaseS1* noundef [[TMP16]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT6]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT6]], [[TMP6]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP12]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [42 x %struct.S]* [[TMP0]] to %struct.S* -// CHECK1-NEXT: [[TMP12:%.*]] = ptrtoint %struct.S* [[TMP11]] to i64 -// CHECK1-NEXT: [[TMP13:%.*]] = ptrtoint %struct.S* [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = sub i64 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: [[TMP15:%.*]] = sdiv exact i64 [[TMP14]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [40 x %struct.S]* [[ARR4]] to %struct.S* -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP16]], i64 [[TMP15]] -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[TMP17]] to [42 x %struct.S]* -// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [40 x %struct.S]* [[ARR4]] to %struct.S* -// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast [42 x %struct.S]* [[TMP2]] to %struct.S* +// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint %struct.S* [[TMP17]] to i64 +// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint %struct.S* [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP20:%.*]] = sub i64 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP21:%.*]] = sdiv exact i64 [[TMP20]], ptrtoint (%struct.S* getelementptr ([[STRUCT_S]], %struct.S* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast [40 x %struct.S]* [[ARR]] to %struct.S* +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP22]], i64 [[TMP21]] +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast %struct.S* [[TMP23]] to [42 x %struct.S]* +// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [40 x %struct.S]* [[ARR]] to %struct.S* +// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP27]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP28]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX8]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP3]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) %struct.S* @_ZN1SIiEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX10]], %struct.S* noundef nonnull align 4 dereferenceable(12) [[TMP29]]) +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP33]], i32* [[ARRAYIDX7]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP8]], i64 0, i64 [[IDXPROM8]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) %struct.S* @_ZN1SIiEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX9]], %struct.S* noundef nonnull align 4 dereferenceable(12) [[TMP35]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP37]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP33]]) -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[RHS_BEGIN]] to i8* -// CHECK1-NEXT: store i8* [[TMP35]], i8** [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP37]], i32 1, i64 8, i8* [[TMP38]], void (i8*, i8*)* @.omp.reduction.reduction_func.38, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP39]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP39]]) +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP41:%.*]] = bitcast %struct.S* [[RHS_BEGIN]] to i8* +// CHECK1-NEXT: store i8* [[TMP41]], i8** [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP45:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP43]], i32 1, i64 8, i8* [[TMP44]], void (i8*, i8*)* @.omp.reduction.reduction_func.38, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP45]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP46]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST12:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP41:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST13]] to i8* -// CHECK1-NEXT: [[ADD_PTR14:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 4 -// CHECK1-NEXT: [[TMP42:%.*]] = bitcast i8* [[ADD_PTR14]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST12]] to i8* -// CHECK1-NEXT: [[ADD_PTR15:%.*]] = getelementptr inbounds i8, i8* [[TMP43]], i64 4 -// CHECK1-NEXT: [[TMP44:%.*]] = bitcast i8* [[ADD_PTR15]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_combiner..36(%struct.BaseS1* noundef [[TMP42]], %struct.BaseS1* noundef [[TMP44]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST12]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP37]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST11:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST12]] to i8* +// CHECK1-NEXT: [[ADD_PTR13:%.*]] = getelementptr inbounds i8, i8* [[TMP47]], i64 4 +// CHECK1-NEXT: [[TMP48:%.*]] = bitcast i8* [[ADD_PTR13]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP49:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST11]] to i8* +// CHECK1-NEXT: [[ADD_PTR14:%.*]] = getelementptr inbounds i8, i8* [[TMP49]], i64 4 +// CHECK1-NEXT: [[TMP50:%.*]] = bitcast i8* [[ADD_PTR14]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_combiner..36(%struct.BaseS1* noundef [[TMP48]], %struct.BaseS1* noundef [[TMP50]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST11]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP46]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done17: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP43]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP45]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE28:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] -// CHECK1: omp.arraycpy.body20: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT26:%.*]], [[OMP_ARRAYCPY_BODY20]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY20]] ] -// CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST22]] to i8* -// CHECK1-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, i8* [[TMP48]], i64 4 -// CHECK1-NEXT: [[TMP49:%.*]] = bitcast i8* [[ADD_PTR23]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP50:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST21]] to i8* -// CHECK1-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds i8, i8* [[TMP50]], i64 4 -// CHECK1-NEXT: [[TMP51:%.*]] = bitcast i8* [[ADD_PTR24]] to %struct.BaseS1* -// CHECK1-NEXT: call void @.omp_combiner..36(%struct.BaseS1* noundef [[TMP49]], %struct.BaseS1* noundef [[TMP51]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT25]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT26]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE27:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT25]], [[TMP45]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_DONE28]], label [[OMP_ARRAYCPY_BODY20]] -// CHECK1: omp.arraycpy.done28: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP37]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP51]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] +// CHECK1: omp.arraycpy.body19: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY19]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY19]] ] +// CHECK1-NEXT: [[TMP52:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[TMP52]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP53]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP54:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST21]] to i8* +// CHECK1-NEXT: [[ADD_PTR22:%.*]] = getelementptr inbounds i8, i8* [[TMP54]], i64 4 +// CHECK1-NEXT: [[TMP55:%.*]] = bitcast i8* [[ADD_PTR22]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP56:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST20]] to i8* +// CHECK1-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, i8* [[TMP56]], i64 4 +// CHECK1-NEXT: [[TMP57:%.*]] = bitcast i8* [[ADD_PTR23]] to %struct.BaseS1* +// CHECK1-NEXT: call void @.omp_combiner..36(%struct.BaseS1* noundef [[TMP55]], %struct.BaseS1* noundef [[TMP57]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP53]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP51]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY19]] +// CHECK1: omp.arraycpy.done27: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP43]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN29:%.*]] = getelementptr inbounds [40 x %struct.S], [40 x %struct.S]* [[ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN29]], i64 40 +// CHECK1-NEXT: [[ARRAY_BEGIN28:%.*]] = getelementptr inbounds [40 x %struct.S], [40 x %struct.S]* [[ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN28]], i64 40 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP52]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP58]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN29]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE30:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done30: -// CHECK1-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP54]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN28]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE29:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done29: +// CHECK1-NEXT: [[TMP59:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP59]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP60]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/for_reduction_task_codegen.cpp b/clang/test/OpenMP/for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/for_reduction_task_codegen.cpp @@ -41,295 +41,301 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[ARGC_ADDR]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i8*** [[ARGV_ADDR]], i8**** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8***, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP28:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i8*** [[ARGV]], i8**** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8***, i8**** [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8***, i8**** [[TMP3]], align 8 // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 9, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = sext i32 [[TMP4]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP5]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 9 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint i8* [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP12:%.*]] = add nuw i64 [[TMP11]], 1 -// CHECK1-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP14:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP12]], align 16 -// CHECK1-NEXT: store i64 [[TMP12]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP12]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP15]] +// CHECK1-NEXT: store i32 0, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8**, i8*** [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 9 +// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint i8* [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP15:%.*]] = add nuw i64 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP17:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP17]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP15]], align 16 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint i8* [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = sub i64 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: [[TMP21:%.*]] = sdiv exact i64 [[TMP20]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP21]] -// CHECK1-NEXT: store i8** [[_TMP6]], i8*** [[_TMP5]], align 8 -// CHECK1-NEXT: store i8* [[TMP22]], i8** [[_TMP6]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8**, i8*** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i8*, i8** [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint i8* [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP23:%.*]] = sub i64 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP24:%.*]] = sdiv exact i64 [[TMP23]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP24]] +// CHECK1-NEXT: store i8** [[_TMP5]], i8*** [[_TMP4]], align 8 +// CHECK1-NEXT: store i8* [[TMP25]], i8** [[_TMP5]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, i64* [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP32]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load i8**, i8*** [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8*, i8** [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i8*, i8** [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, i8* [[TMP35]], i64 0 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = sext i32 [[TMP36]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP37]] -// CHECK1-NEXT: [[TMP38:%.*]] = load i8**, i8*** [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8*, i8** [[TMP38]], i64 9 -// CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, i8* [[TMP39]], i64 [[LB_ADD_LEN10]] -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 -// CHECK1-NEXT: store i8* [[ARRAYIDX9]], i8** [[TMP40]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint i8* [[ARRAYIDX12]] to i64 -// CHECK1-NEXT: [[TMP42:%.*]] = ptrtoint i8* [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP43:%.*]] = sub i64 [[TMP41]], [[TMP42]] -// CHECK1-NEXT: [[TMP44:%.*]] = sdiv exact i64 [[TMP43]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP45:%.*]] = add nuw i64 [[TMP44]], 1 -// CHECK1-NEXT: [[TMP46:%.*]] = mul nuw i64 [[TMP45]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP46]], i64* [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..1 to i8*), i8** [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..2 to i8*), i8** [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, i32* [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[TMP52]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* -// CHECK1-NEXT: [[TMP55:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP53]], i32 1, i32 2, i8* [[TMP54]]) -// CHECK1-NEXT: store i8* [[TMP55]], i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load i32, i32* [[TMP56]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP57]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP58]], 9 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast i32* [[ARGC]] to i8* +// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP2]] to i8* +// CHECK1-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, i64* [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast i32* [[TMP34]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP35]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP37:%.*]] = load i8**, i8*** [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8*, i8** [[TMP37]], i64 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i8*, i8** [[ARRAYIDX7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, i8* [[TMP38]], i64 0 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = sext i32 [[TMP39]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP40]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i8**, i8*** [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8*, i8** [[TMP41]], i64 9 +// CHECK1-NEXT: [[TMP42:%.*]] = load i8*, i8** [[ARRAYIDX10]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[TMP42]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 +// CHECK1-NEXT: store i8* [[ARRAYIDX8]], i8** [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint i8* [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint i8* [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] +// CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP48:%.*]] = add nuw i64 [[TMP47]], 1 +// CHECK1-NEXT: [[TMP49:%.*]] = mul nuw i64 [[TMP48]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP49]], i64* [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..1 to i8*), i8** [[TMP51]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..2 to i8*), i8** [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, i32* [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4 +// CHECK1-NEXT: [[TMP57:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* +// CHECK1-NEXT: [[TMP58:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP56]], i32 1, i32 2, i8* [[TMP57]]) +// CHECK1-NEXT: store i8* [[TMP58]], i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP59]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP60]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP61:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP61]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP59]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP62]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP60]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP63]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP61:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP61]], [[TMP62]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP64:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP64]], [[TMP65]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP63:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP63]], 1 +// CHECK1-NEXT: [[TMP66:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP66]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP64]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[ARGC1]], i32** [[TMP65]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP67:%.*]] = load i8**, i8*** [[_TMP5]], align 8 -// CHECK1-NEXT: store i8** [[TMP67]], i8*** [[TMP66]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP69:%.*]] = load i32, i32* [[TMP68]], align 4 -// CHECK1-NEXT: [[TMP70:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP69]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP71:%.*]] = bitcast i8* [[TMP70]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP71]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP72]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP74:%.*]] = load i8*, i8** [[TMP73]], align 8 -// CHECK1-NEXT: [[TMP75:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP74]], i8* align 8 [[TMP75]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP71]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP76]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP78:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store i8* [[TMP78]], i8** [[TMP77]], align 8 -// CHECK1-NEXT: [[TMP79:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = load i32, i32* [[TMP79]], align 4 -// CHECK1-NEXT: [[TMP81:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP80]], i8* [[TMP70]]) +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP67]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[ARGC]], i32** [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP70:%.*]] = load i8**, i8*** [[_TMP4]], align 8 +// CHECK1-NEXT: store i8** [[TMP70]], i8*** [[TMP69]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load i32, i32* [[TMP71]], align 4 +// CHECK1-NEXT: [[TMP73:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP72]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP74:%.*]] = bitcast i8* [[TMP73]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP74]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP75]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP77:%.*]] = load i8*, i8** [[TMP76]], align 8 +// CHECK1-NEXT: [[TMP78:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP77]], i8* align 8 [[TMP78]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP74]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP79]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP81:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store i8* [[TMP81]], i8** [[TMP80]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP83:%.*]] = load i32, i32* [[TMP82]], align 4 +// CHECK1-NEXT: [[TMP84:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP83]], i8* [[TMP73]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP82:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP82]], 1 -// CHECK1-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP85:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP85]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP83:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP84:%.*]] = load i32, i32* [[TMP83]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP84]]) -// CHECK1-NEXT: [[TMP85:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP86:%.*]] = load i32, i32* [[TMP85]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP86]], i32 1) -// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP88:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP88]], i8** [[TMP87]], align 8 -// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP89]], align 8 -// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP91:%.*]] = inttoptr i64 [[TMP12]] to i8* +// CHECK1-NEXT: [[TMP86:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP87:%.*]] = load i32, i32* [[TMP86]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP87]]) +// CHECK1-NEXT: [[TMP88:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP89:%.*]] = load i32, i32* [[TMP88]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP89]], i32 1) +// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP91:%.*]] = bitcast i32* [[ARGC]] to i8* // CHECK1-NEXT: store i8* [[TMP91]], i8** [[TMP90]], align 8 -// CHECK1-NEXT: [[TMP92:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP93:%.*]] = load i32, i32* [[TMP92]], align 4 -// CHECK1-NEXT: [[TMP94:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP95:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP93]], i32 2, i64 24, i8* [[TMP94]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP95]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP92:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP92]], align 8 +// CHECK1-NEXT: [[TMP93:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP94:%.*]] = inttoptr i64 [[TMP15]] to i8* +// CHECK1-NEXT: store i8* [[TMP94]], i8** [[TMP93]], align 8 +// CHECK1-NEXT: [[TMP95:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP96:%.*]] = load i32, i32* [[TMP95]], align 4 +// CHECK1-NEXT: [[TMP97:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP98:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP96]], i32 2, i64 24, i8* [[TMP97]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP98]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP96:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP97:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP96]], [[TMP97]] -// CHECK1-NEXT: store i32 [[ADD15]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP12]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP98]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP99:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP100:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP99]], [[TMP100]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP101:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP101]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP99:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP99]] to i32 -// CHECK1-NEXT: [[TMP100:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP100]] to i32 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]] -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP102:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP102]] to i32 +// CHECK1-NEXT: [[TMP103:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP103]] to i32 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK1-NEXT: store i8 [[CONV18]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP98]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done22: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP93]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP101]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done21: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP96]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP101:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP102:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP101]] monotonic, align 4 -// CHECK1-NEXT: [[TMP103:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP12]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP103]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] -// CHECK1: omp.arraycpy.body24: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP104:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP104]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1 +// CHECK1-NEXT: [[TMP104:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP105:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP104]] monotonic, align 4 +// CHECK1-NEXT: [[TMP106:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP106]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] +// CHECK1: omp.arraycpy.body23: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP107:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP107]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP105:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP110:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP105]], i8* [[_TMP28]], align 1 -// CHECK1-NEXT: [[TMP106:%.*]] = load i8, i8* [[_TMP28]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP106]] to i32 -// CHECK1-NEXT: [[TMP107:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP107]] to i32 -// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]] -// CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK1-NEXT: store i8 [[CONV32]], i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP108:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP109:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP105]], i8 [[TMP108]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP110]] = extractvalue { i8, i1 } [[TMP109]], 0 -// CHECK1-NEXT: [[TMP111:%.*]] = extractvalue { i8, i1 } [[TMP109]], 1 -// CHECK1-NEXT: br i1 [[TMP111]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP108:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP113:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP108]], i8* [[_TMP27]], align 1 +// CHECK1-NEXT: [[TMP109:%.*]] = load i8, i8* [[_TMP27]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP109]] to i32 +// CHECK1-NEXT: [[TMP110:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP110]] to i32 +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] +// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK1-NEXT: store i8 [[CONV31]], i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP111:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP112:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP108]], i8 [[TMP111]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP113]] = extractvalue { i8, i1 } [[TMP112]], 0 +// CHECK1-NEXT: [[TMP114:%.*]] = extractvalue { i8, i1 } [[TMP112]], 1 +// CHECK1-NEXT: br i1 [[TMP114]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP103]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]] -// CHECK1: omp.arraycpy.done36: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP93]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP106]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] +// CHECK1: omp.arraycpy.done35: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP96]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP112:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP112]]) -// CHECK1-NEXT: [[TMP113:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP114:%.*]] = load i32, i32* [[TMP113]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP114]]) +// CHECK1-NEXT: [[TMP115:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP115]]) +// CHECK1-NEXT: [[TMP116:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP117:%.*]] = load i32, i32* [[TMP116]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP117]]) // CHECK1-NEXT: ret void // // @@ -439,7 +445,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP4_I:%.*]] = alloca i8*, align 8 @@ -453,7 +459,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -466,29 +472,29 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load i8*, i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 // CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load i8**, i8*** [[TMP30]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i8*, i8** [[TMP31]], i64 9 // CHECK1-NEXT: [[TMP32:%.*]] = load i8*, i8** [[ARRAYIDX2_I]], align 8 @@ -499,10 +505,10 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 // CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint i8* [[TMP43]] to i64 diff --git a/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp --- a/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp @@ -55,6 +55,7 @@ // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_3:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 // CHECK1-NEXT: [[DOTTASK_RED_6:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -174,24 +175,34 @@ // CHECK1-NEXT: [[TMP57:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t.0]* [[DOTRD_INPUT_3]] to i8* // CHECK1-NEXT: [[TMP58:%.*]] = call i8* @__kmpc_taskred_init(i32 [[TMP0]], i32 2, i8* [[TMP57]]) // CHECK1-NEXT: store i8* [[TMP58]], i8** [[DOTTASK_RED_6]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[A]], i64 [[TMP2]], i16* [[VLA]], i8** [[DOTTASK_RED_]], i8** [[DOTTASK_RED_6]]) +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[A]], i32** [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP60]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i16* [[VLA]], i16** [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store i8** [[DOTTASK_RED_6]], i8*** [[TMP63]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP59:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP59]]) +// CHECK1-NEXT: [[TMP64:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP64]]) // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[C]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 5 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP60]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP65]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SD1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done8: -// CHECK1-NEXT: [[TMP61:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP61]] +// CHECK1-NEXT: [[TMP66:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP66]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ev @@ -468,73 +479,71 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[VLA:%.*]], i16* nonnull align 2 dereferenceable(2) [[D:%.*]], i8** nonnull align 8 dereferenceable(8) [[DOTTASK_RED_:%.*]], i8** nonnull align 8 dereferenceable(8) [[DOTTASK_RED_1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR2:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_1]], i8*** [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK1-NEXT: br i1 [[TMP8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16*, i16** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i8**, i8*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store i16* [[TMP2]], i16** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store i8** [[TMP3]], i8*** [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 4 -// CHECK1-NEXT: store i8** [[TMP4]], i8*** [[TMP13]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP14:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP15]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP16]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i8*, i8** [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP18]], i8* align 8 [[TMP19]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP15]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP20]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP3]], align 8 -// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP20]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP4]], align 8 -// CHECK1-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP16]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, i64* [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP16]], i32 0, i32 6 -// CHECK1-NEXT: store i64 4, i64* [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP16]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, i64* [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP16]], i32 0, i32 9 -// CHECK1-NEXT: [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP29]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP30:%.*]] = load i64, i64* [[TMP27]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i8* [[TMP14]], i32 1, i64* [[TMP25]], i64* [[TMP26]], i64 [[TMP30]], i32 1, i32 0, i64 0, i8* null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store i16* [[TMP6]], i16** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[TMP8]], i8*** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 4 +// CHECK1-NEXT: store i8** [[TMP10]], i8*** [[TMP19]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP20:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP21]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast %struct.anon.1* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP24]], i8* align 8 [[TMP25]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP21]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP26]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = load i8*, i8** [[TMP8]], align 8 +// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP26]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i8*, i8** [[TMP10]], align 8 +// CHECK1-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP22]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, i64* [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP22]], i32 0, i32 6 +// CHECK1-NEXT: store i64 4, i64* [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP22]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, i64* [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP22]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP35]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP36:%.*]] = load i64, i64* [[TMP33]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i8* [[TMP20]], i32 1, i64* [[TMP31]], i64* [[TMP32]], i64 [[TMP36]], i32 1, i32 0, i64 0, i8* null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -572,7 +581,7 @@ // CHECK1-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[I_I:%.*]] = alloca i32, align 4 @@ -587,7 +596,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -616,9 +625,9 @@ // CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP22]], i32 0, i32 1 // CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 @@ -626,14 +635,14 @@ // CHECK1-NEXT: call void [[TMP27]](i8* [[TMP26]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3]] // CHECK1-NEXT: [[TMP28:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = load i8*, i8** [[TMP28]], align 8 // CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP31]] to i8* // CHECK1-NEXT: [[TMP35:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP33]], i8* [[TMP32]], i8* [[TMP34]]) #[[ATTR3]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP35]] to i32* -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 2 // CHECK1-NEXT: [[TMP37:%.*]] = load i16*, i16** [[TMP36]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP24]], 2 // CHECK1-NEXT: [[TMP39:%.*]] = udiv exact i64 [[TMP38]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) diff --git a/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp --- a/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp @@ -55,6 +55,7 @@ // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_3:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 // CHECK1-NEXT: [[DOTTASK_RED_6:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -174,24 +175,34 @@ // CHECK1-NEXT: [[TMP57:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t.0]* [[DOTRD_INPUT_3]] to i8* // CHECK1-NEXT: [[TMP58:%.*]] = call i8* @__kmpc_taskred_init(i32 [[TMP0]], i32 2, i8* [[TMP57]]) // CHECK1-NEXT: store i8* [[TMP58]], i8** [[DOTTASK_RED_6]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[A]], i64 [[TMP2]], i16* [[VLA]], i8** [[DOTTASK_RED_]], i8** [[DOTTASK_RED_6]]) +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[A]], i32** [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP60]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i16* [[VLA]], i16** [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store i8** [[DOTTASK_RED_6]], i8*** [[TMP63]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP59:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP59]]) +// CHECK1-NEXT: [[TMP64:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP64]]) // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[C]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 5 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP60]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP65]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SD1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done8: -// CHECK1-NEXT: [[TMP61:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP61]] +// CHECK1-NEXT: [[TMP66:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP66]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ev @@ -468,73 +479,71 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[VLA:%.*]], i16* nonnull align 2 dereferenceable(2) [[D:%.*]], i8** nonnull align 8 dereferenceable(8) [[DOTTASK_RED_:%.*]], i8** nonnull align 8 dereferenceable(8) [[DOTTASK_RED_1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR2:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_1]], i8*** [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK1-NEXT: br i1 [[TMP8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16*, i16** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i8**, i8*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store i16* [[TMP2]], i16** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store i8** [[TMP3]], i8*** [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 4 -// CHECK1-NEXT: store i8** [[TMP4]], i8*** [[TMP13]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP14:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP15]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP16]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i8*, i8** [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP18]], i8* align 8 [[TMP19]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP15]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP20]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP3]], align 8 -// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP20]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP4]], align 8 -// CHECK1-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP16]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, i64* [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP16]], i32 0, i32 6 -// CHECK1-NEXT: store i64 4, i64* [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP16]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, i64* [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP16]], i32 0, i32 9 -// CHECK1-NEXT: [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP29]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP30:%.*]] = load i64, i64* [[TMP27]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i8* [[TMP14]], i32 1, i64* [[TMP25]], i64* [[TMP26]], i64 [[TMP30]], i32 1, i32 0, i64 0, i8* null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store i16* [[TMP6]], i16** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[TMP8]], i8*** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 4 +// CHECK1-NEXT: store i8** [[TMP10]], i8*** [[TMP19]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP20:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP21]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast %struct.anon.1* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP24]], i8* align 8 [[TMP25]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP21]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP26]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = load i8*, i8** [[TMP8]], align 8 +// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP26]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i8*, i8** [[TMP10]], align 8 +// CHECK1-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP22]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, i64* [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP22]], i32 0, i32 6 +// CHECK1-NEXT: store i64 4, i64* [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP22]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, i64* [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP22]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP35]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP36:%.*]] = load i64, i64* [[TMP33]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i8* [[TMP20]], i32 1, i64* [[TMP31]], i64* [[TMP32]], i64 [[TMP36]], i32 1, i32 0, i64 0, i8* null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -572,7 +581,7 @@ // CHECK1-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[I_I:%.*]] = alloca i32, align 4 @@ -587,7 +596,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -616,9 +625,9 @@ // CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP22]], i32 0, i32 1 // CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 @@ -626,14 +635,14 @@ // CHECK1-NEXT: call void [[TMP27]](i8* [[TMP26]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3]] // CHECK1-NEXT: [[TMP28:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = load i8*, i8** [[TMP28]], align 8 // CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP31]] to i8* // CHECK1-NEXT: [[TMP35:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP33]], i8* [[TMP32]], i8* [[TMP34]]) #[[ATTR3]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP35]] to i32* -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 2 // CHECK1-NEXT: [[TMP37:%.*]] = load i16*, i16** [[TMP36]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP24]], 2 // CHECK1-NEXT: [[TMP39:%.*]] = udiv exact i64 [[TMP38]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) diff --git a/clang/test/OpenMP/metadirective_device_kind_codegen.c b/clang/test/OpenMP/metadirective_device_kind_codegen.c --- a/clang/test/OpenMP/metadirective_device_kind_codegen.c +++ b/clang/test/OpenMP/metadirective_device_kind_codegen.c @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c -triple aarch64-unknown-linux -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c -triple ppc64le-unknown-linux -emit-llvm %s -o - | FileCheck %s @@ -7,6 +8,26 @@ void bar(void); +// CHECK-LABEL: @foo( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_4:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_6:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 4) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_3]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_4]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_6]]) +// CHECK-NEXT: ret void +// void foo(void) { #pragma omp metadirective when(device = {kind(any)} \ : parallel) @@ -35,46 +56,12 @@ ; } -// CHECK-LABEL: define {{.+}} void @foo() -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_1:@.+]] to void -// CHECK-NEXT: @__kmpc_push_num_threads -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_2:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_3:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_4:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_5:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_6:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_7:@.+]] to void -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_1]]( -// CHECK: call void @bar -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_2]]( -// CHECK: call void @__kmpc_for_static_init -// CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_3]]( -// CHECK: call void @__kmpc_for_static_init -// CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_4]]( -// CHECK: call void @bar -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_5]]( -// CHECK: call void @bar -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_6]]( -// CHECK: call void @bar -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_7]]( -// CHECK: call void @__kmpc_for_static_init -// CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void #endif diff --git a/clang/test/OpenMP/metadirective_device_kind_codegen.cpp b/clang/test/OpenMP/metadirective_device_kind_codegen.cpp --- a/clang/test/OpenMP/metadirective_device_kind_codegen.cpp +++ b/clang/test/OpenMP/metadirective_device_kind_codegen.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple aarch64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple ppc64le-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s @@ -8,6 +9,26 @@ void bar(); +// CHECK-LABEL: @_Z3foov( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_4:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_6:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 4) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_3]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_4]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_6]]) +// CHECK-NEXT: ret void +// void foo() { #pragma omp metadirective when(device = {kind(any)} \ : parallel) @@ -36,46 +57,12 @@ ; } -// CHECK-LABEL: define {{.+}} void @_Z3foov() -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_1:@.+]] to void -// CHECK-NEXT: @__kmpc_push_num_threads -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_2:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_3:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_4:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_5:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_6:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_7:@.+]] to void -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_1]]( -// CHECK: void @_Z3barv() -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_2]]( -// CHECK: call void @__kmpc_for_static_init -// CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_3]]( -// CHECK: call void @__kmpc_for_static_init -// CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_4]]( -// CHECK: void @_Z3barv() -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_5]]( -// CHECK: void @_Z3barv() -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_6]]( -// CHECK: void @_Z3barv() -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_7]]( -// CHECK: call void @__kmpc_for_static_init -// CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void #endif diff --git a/clang/test/OpenMP/metadirective_implementation_codegen.cpp b/clang/test/OpenMP/metadirective_implementation_codegen.cpp --- a/clang/test/OpenMP/metadirective_implementation_codegen.cpp +++ b/clang/test/OpenMP/metadirective_implementation_codegen.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple aarch64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple ppc64le-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s @@ -8,6 +9,22 @@ void bar(); +// CHECK-LABEL: @_Z3foov( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_4:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_3]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_4]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]]) +// CHECK-NEXT: ret void +// void foo() { #pragma omp metadirective when(implementation = {vendor(score(0) \ : llvm)}, \ @@ -37,40 +54,15 @@ ; } -// CHECK-LABEL: void @_Z3foov() -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_2:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_3:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_4:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_5:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_6:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_7:@.+]] to void -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_2]]( -// CHECK: @_Z3barv -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_3]]( -// CHECK: @_Z3barv -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_4]]( -// CHECK: @_Z3barv -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_5]]( // NO-CHECK: call void @__kmpc_for_static_init // NO-CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_6]]( -// CHECK: call void @__kmpc_for_static_init -// CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_7]]( // NO-CHECK: call void @__kmpc_for_static_init // NO-CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void #endif diff --git a/clang/test/OpenMP/nvptx_allocate_codegen.cpp b/clang/test/OpenMP/nvptx_allocate_codegen.cpp --- a/clang/test/OpenMP/nvptx_allocate_codegen.cpp +++ b/clang/test/OpenMP/nvptx_allocate_codegen.cpp @@ -91,7 +91,7 @@ // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: store i32 2, i32* @_ZZ4mainE1a, align 4 // CHECK1-NEXT: store double 3.000000e+00, double* [[B]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooIiET_v() #[[ATTR7:[0-9]+]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooIiET_v() #[[ATTR9:[0-9]+]] // CHECK1-NEXT: ret i32 [[CALL]] // // @@ -109,25 +109,35 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[BAR_A:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[BAR_B:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK1-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP1]], i64 0) +// CHECK1-NEXT: [[TMP1:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP1]], i8* [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to %struct.anon* +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP4]], %struct.anon* [[TMP5]], align 1 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8* [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP2]], i64 1) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[BAR_A:%.*]] = alloca float, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float, float* [[BAR_A]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP0]] to double +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load float, float* [[BAR_A]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP1]] to double // CHECK1-NEXT: store double [[CONV]], double* addrspacecast (double addrspace(3)* @bar_b to double*), align 8 -// CHECK1-NEXT: call void @_Z3bazRf(float* noundef nonnull align 4 dereferenceable(4) [[BAR_A]]) #[[ATTR7]] +// CHECK1-NEXT: call void @_Z3bazRf(float* noundef nonnull align 4 dereferenceable(4) [[BAR_A]]) #[[ATTR9]] // CHECK1-NEXT: ret void // // @@ -137,11 +147,11 @@ // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon* null) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp --- a/clang/test/OpenMP/nvptx_data_sharing.cpp +++ b/clang/test/OpenMP/nvptx_data_sharing.cpp @@ -33,9 +33,11 @@ // CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14 // CHECK-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [2 x i8*], align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK-NEXT: [[DOTTMP_OUTLINED_AGG_ARG2:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -46,21 +48,30 @@ // CHECK-NEXT: [[B_ON_STACK:%.*]] = bitcast i8* [[B]] to i32* // CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: store i32 10, i32* [[A_ON_STACK]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[A_ON_STACK]] to i8* -// CHECK-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP4]], i64 1) +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-NEXT: store i32* [[A_ON_STACK]], i32** [[TMP2]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-NEXT: [[TMP4:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP3]], i8* [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP5]] to %struct.anon* +// CHECK-NEXT: store [[STRUCT_ANON]] [[TMP6]], %struct.anon* [[TMP7]], align 8 +// CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8* [[TMP5]]) +// CHECK-NEXT: call void @__kmpc_free_shared(i8* [[TMP4]], i64 8) // CHECK-NEXT: store i32 100, i32* [[B_ON_STACK]], align 4 // CHECK-NEXT: store i32 1000, i32* [[C]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS1]], i64 0, i64 0 -// CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[B_ON_STACK]] to i8* -// CHECK-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS1]], i64 0, i64 1 -// CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[A_ON_STACK]] to i8* -// CHECK-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 -// CHECK-NEXT: [[TMP9:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -// CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP9]], i64 2) +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK-NEXT: store i32* [[B_ON_STACK]], i32** [[TMP8]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 1 +// CHECK-NEXT: store i32* [[A_ON_STACK]], i32** [[TMP9]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG2]] to i8* +// CHECK-NEXT: [[TMP11:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-NEXT: [[TMP12:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP10]], i8* [[TMP11]]) +// CHECK-NEXT: [[TMP13:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP12]] to %struct.anon.0* +// CHECK-NEXT: store [[STRUCT_ANON_0]] [[TMP13]], %struct.anon.0* [[TMP14]], align 8 +// CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8* [[TMP12]]) +// CHECK-NEXT: call void @__kmpc_free_shared(i8* [[TMP11]], i64 16) // CHECK-NEXT: call void @__kmpc_free_shared(i8* [[B]], i64 4) // CHECK-NEXT: call void @__kmpc_free_shared(i8* [[A]], i64 4) // CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -70,16 +81,18 @@ // // // CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK-NEXT: store i32 1000, i32* [[TMP0]], align 4 +// CHECK-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK-NEXT: store i32 1000, i32* [[TMP2]], align 4 // CHECK-NEXT: ret void // // @@ -89,38 +102,38 @@ // CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 8 // CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 -// CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -// CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK-NEXT: [[TMP2:%.*]] = load i8*, i8** [[GLOBAL_ARGS]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.anon** +// CHECK-NEXT: [[TMP4:%.*]] = bitcast %struct.anon** [[TMP3]] to %struct.anon* +// CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon* [[TMP4]]) #[[ATTR4:[0-9]+]] // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[C1:%.*]] = alloca i32*, align 8 // CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 8 -// CHECK-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 +// CHECK-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 // CHECK-NEXT: store i32* [[C]], i32** [[C1]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 10000 -// CHECK-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 10000 +// CHECK-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 4 // CHECK-NEXT: ret void // // @@ -130,18 +143,14 @@ // CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 8 // CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 -// CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 -// CHECK-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** -// CHECK-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 -// CHECK-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]]) #[[ATTR4]] +// CHECK-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK-NEXT: [[TMP2:%.*]] = load i8*, i8** [[GLOBAL_ARGS]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.anon.0** +// CHECK-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.0** [[TMP3]] to %struct.anon.0* +// CHECK-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.0* [[TMP4]]) #[[ATTR4]] // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp --- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp @@ -32,7 +32,7 @@ // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK4-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 @@ -50,13 +50,20 @@ // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: // CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP6]], i32* [[CONV1]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP6]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: store [10 x i32]* [[TMP1]], [10 x i32]** [[TMP7]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: store i32* [[TMP2]], i32** [[TMP8]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK4-NEXT: store [10 x i32]* [[TMP3]], [10 x i32]** [[TMP11]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i64 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR5:[0-9]+]] +// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR5:[0-9]+]] // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK4-NEXT: ret void // CHECK4: worker.exit: @@ -64,183 +71,181 @@ // // // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[C:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[ARGC:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK4-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 -// CHECK4-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 8 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK4-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK4-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 -// CHECK4-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* -// CHECK4-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 8 -// CHECK4-NEXT: [[C1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 40) -// CHECK4-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C1]] to [10 x i32]* -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK4-NEXT: store i32 [[TMP8]], i32* [[ARGC]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP9]], align 8 +// CHECK4-NEXT: [[C:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 40) +// CHECK4-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C]] to [10 x i32]* +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK4-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK4-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK4-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = bitcast [10 x i32]* [[B4]] to i8* -// CHECK4-NEXT: [[TMP9:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false) +// CHECK4-NEXT: [[TMP15:%.*]] = bitcast [10 x i32]* [[B]] to i8* +// CHECK4-NEXT: [[TMP16:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i64 40, i1 false) // CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] +// CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK4-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] -// CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP24]], [[ADD]] +// CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP20]] to i8* -// CHECK4-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 -// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP22]] to i8* -// CHECK4-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK4-NEXT: [[TMP28:%.*]] = bitcast i32* [[CONV]] to i8* -// CHECK4-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 -// CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK4-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP2]] to i8* -// CHECK4-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 -// CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK4-NEXT: [[TMP32:%.*]] = bitcast [10 x i32]* [[B4]] to i8* -// CHECK4-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 8 -// CHECK4-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 -// CHECK4-NEXT: [[TMP34:%.*]] = bitcast [10 x i32]* [[C_ON_STACK]] to i8* -// CHECK4-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 8 -// CHECK4-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 6 -// CHECK4-NEXT: [[TMP36:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK4-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 8 -// CHECK4-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK4-NEXT: [[TMP38:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK4-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 -// CHECK4-NEXT: [[TMP41:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP40]], i32 [[TMP38]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP41]], i64 7) +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK4-NEXT: store i64 [[TMP27]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = zext i32 [[TMP28]] to i64 +// CHECK4-NEXT: store i64 [[TMP29]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP30]], align 8 +// CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP31]], align 8 +// CHECK4-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: store i32* [[ARGC]], i32** [[TMP32]], align 8 +// CHECK4-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: store i32* [[TMP6]], i32** [[TMP33]], align 8 +// CHECK4-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP34]], align 8 +// CHECK4-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK4-NEXT: store [10 x i32]* [[C_ON_STACK]], [10 x i32]** [[TMP35]], align 8 +// CHECK4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK4-NEXT: store [10 x i32]* [[TMP10]], [10 x i32]** [[TMP36]], align 8 +// CHECK4-NEXT: [[TMP37:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK4-NEXT: [[TMP38:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 56) +// CHECK4-NEXT: [[TMP39:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP37]], i8* [[TMP38]]) +// CHECK4-NEXT: [[TMP40:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK4-NEXT: [[TMP41:%.*]] = bitcast i8* [[TMP39]] to %struct.anon.0* +// CHECK4-NEXT: store [[STRUCT_ANON_0]] [[TMP40]], %struct.anon.0* [[TMP41]], align 8 +// CHECK4-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK4-NEXT: [[TMP43:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK4-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP45]], i32 [[TMP43]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP39]]) +// CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[TMP38]], i64 56) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] -// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] -// CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] -// CHECK4-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] -// CHECK4: cond.true12: -// CHECK4-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: br label [[COND_END14:%.*]] -// CHECK4: cond.false13: -// CHECK4-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: br label [[COND_END14]] -// CHECK4: cond.end14: -// CHECK4-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP50]], [[COND_TRUE12]] ], [ [[TMP51]], [[COND_FALSE13]] ] -// CHECK4-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP52]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] +// CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP48]], [[TMP49]] +// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP50]], [[TMP51]] +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP52]], [[TMP53]] +// CHECK4-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK4: cond.true10: +// CHECK4-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: br label [[COND_END12:%.*]] +// CHECK4: cond.false11: +// CHECK4-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END12]] +// CHECK4: cond.end12: +// CHECK4-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP54]], [[COND_TRUE10]] ], [ [[TMP55]], [[COND_FALSE11]] ] +// CHECK4-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP56:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP56]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 -// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP54]]) -// CHECK4-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK4-NEXT: br i1 [[TMP56]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4-NEXT: [[TMP57:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP57]], align 4 +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP58]]) +// CHECK4-NEXT: [[TMP59:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0 +// CHECK4-NEXT: br i1 [[TMP60]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: [[TMP57:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* -// CHECK4-NEXT: [[TMP58:%.*]] = bitcast [10 x i32]* [[C_ON_STACK]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP57]], i8* align 4 [[TMP58]], i64 40, i1 false) +// CHECK4-NEXT: [[TMP61:%.*]] = bitcast [10 x i32]* [[TMP4]] to i8* +// CHECK4-NEXT: [[TMP62:%.*]] = bitcast [10 x i32]* [[C_ON_STACK]] to i8* +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP61]], i8* align 4 [[TMP62]], i64 40, i1 false) // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK4: .omp.lastprivate.done: // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: -// CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[C1]], i64 40) +// CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[C]], i64 40) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK4-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -250,107 +255,111 @@ // CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 -// CHECK4-NEXT: [[C5:%.*]] = alloca [10 x i32], align 4 -// CHECK4-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca [10 x i32], align 4 +// CHECK4-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK4-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 8 -// CHECK4-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK4-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP9]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK4-NEXT: [[TMP12:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP11]], align 8 +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK4-NEXT: [[TMP14:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP13]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK4-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK4-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK4-NEXT: [[TMP20:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK4-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK4-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B4]] to i8* -// CHECK4-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 40, i1 false) -// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = bitcast [10 x i32]* [[B]] to i8* +// CHECK4-NEXT: [[TMP22:%.*]] = bitcast [10 x i32]* [[TMP10]] to i8* +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i64 40, i1 false) +// CHECK4-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[CONV7:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK4-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP17]] -// CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[CONV5:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK4-NEXT: [[TMP27:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK4-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP27]] +// CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK4-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[I6]]) #[[ATTR8:[0-9]+]] -// CHECK4-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[TMP1]]) #[[ATTR8]] -// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[CALL]], [[CALL9]] -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B4]], i64 0, i64 [[IDXPROM]] -// CHECK4-NEXT: [[CALL11:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX]]) #[[ATTR8]] -// CHECK4-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD10]], [[CALL11]] -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK4-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK4-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C5]], i64 0, i64 [[IDXPROM13]] -// CHECK4-NEXT: [[CALL15:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX14]]) #[[ATTR8]] -// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD12]], [[CALL15]] -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK4-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK4-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i64 0, i64 [[IDXPROM17]] -// CHECK4-NEXT: [[CALL19:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX18]]) #[[ATTR8]] -// CHECK4-NEXT: [[ADD20:%.*]] = add nsw i32 [[ADD16]], [[CALL19]] -// CHECK4-NEXT: store i32 [[ADD20]], i32* [[TMP1]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK4-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[I4]]) #[[ATTR8:[0-9]+]] +// CHECK4-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[TMP8]]) #[[ATTR8]] +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 [[IDXPROM]] +// CHECK4-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX]]) #[[ATTR8]] +// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK4-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK4-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C]], i64 0, i64 [[IDXPROM11]] +// CHECK4-NEXT: [[CALL13:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX12]]) #[[ATTR8]] +// CHECK4-NEXT: [[ADD14:%.*]] = add nsw i32 [[ADD10]], [[CALL13]] +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK4-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK4-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP14]], i64 0, i64 [[IDXPROM15]] +// CHECK4-NEXT: [[CALL17:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX16]]) #[[ATTR8]] +// CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[ADD14]], [[CALL17]] +// CHECK4-NEXT: store i32 [[ADD18]], i32* [[TMP8]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK4-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK4-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]]) -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK4-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP35]]) +// CHECK4-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK4-NEXT: br i1 [[TMP37]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK4-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C5]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 40, i1 false) +// CHECK4-NEXT: [[TMP38:%.*]] = bitcast [10 x i32]* [[TMP12]] to i8* +// CHECK4-NEXT: [[TMP39:%.*]] = bitcast [10 x i32]* [[C]] to i8* +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 40, i1 false) // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK4: .omp.lastprivate.done: // CHECK4-NEXT: br label [[OMP_PRECOND_END]] @@ -366,7 +375,7 @@ // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK5-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 @@ -383,12 +392,20 @@ // CHECK5-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK5: user_code.entry: // CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[ARGC_CASTED]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP6]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store [10 x i32]* [[TMP1]], [10 x i32]** [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store i32* [[TMP2]], i32** [[TMP8]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store [10 x i32]* [[TMP3]], [10 x i32]** [[TMP11]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR5:[0-9]+]] +// CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR5:[0-9]+]] // CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK5-NEXT: ret void // CHECK5: worker.exit: @@ -396,180 +413,179 @@ // // // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[C:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[ARGC:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK5-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 -// CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK5-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK5-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK5-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 -// CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK5-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK5-NEXT: [[C1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 40) -// CHECK5-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C1]] to [10 x i32]* -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], i32* [[ARGC]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP9]], align 4 +// CHECK5-NEXT: [[C:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 40) +// CHECK5-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C]] to [10 x i32]* +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK5-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK5-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK5-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK5-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK5: omp.precond.then: // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = bitcast [10 x i32]* [[B4]] to i8* -// CHECK5-NEXT: [[TMP9:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) +// CHECK5-NEXT: [[TMP15:%.*]] = bitcast [10 x i32]* [[B]] to i8* +// CHECK5-NEXT: [[TMP16:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i32 40, i1 false) // CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK5-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK5-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] -// CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP24]], [[ADD]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP19]] to i8* -// CHECK5-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP20]] to i8* -// CHECK5-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK5-NEXT: [[TMP26:%.*]] = bitcast i32* [[ARGC_ADDR]] to i8* -// CHECK5-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK5-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP2]] to i8* -// CHECK5-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK5-NEXT: [[TMP30:%.*]] = bitcast [10 x i32]* [[B4]] to i8* -// CHECK5-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 -// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 -// CHECK5-NEXT: [[TMP32:%.*]] = bitcast [10 x i32]* [[C_ON_STACK]] to i8* -// CHECK5-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 4 -// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 -// CHECK5-NEXT: [[TMP34:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK5-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 4 -// CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK5-NEXT: [[TMP36:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK5-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 -// CHECK5-NEXT: [[TMP39:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP38]], i32 [[TMP36]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP39]], i32 7) +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: store i32 [[TMP27]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP28]], align 4 +// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP29]], align 4 +// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store i32* [[ARGC]], i32** [[TMP30]], align 4 +// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 4 +// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP32]], align 4 +// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: store [10 x i32]* [[C_ON_STACK]], [10 x i32]** [[TMP33]], align 4 +// CHECK5-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK5-NEXT: store [10 x i32]* [[TMP10]], [10 x i32]** [[TMP34]], align 4 +// CHECK5-NEXT: [[TMP35:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK5-NEXT: [[TMP36:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 28) +// CHECK5-NEXT: [[TMP37:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP35]], i8* [[TMP36]]) +// CHECK5-NEXT: [[TMP38:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK5-NEXT: [[TMP39:%.*]] = bitcast i8* [[TMP37]] to %struct.anon.0* +// CHECK5-NEXT: store [[STRUCT_ANON_0]] [[TMP38]], %struct.anon.0* [[TMP39]], align 4 +// CHECK5-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK5-NEXT: [[TMP41:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK5-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP43]], i32 [[TMP41]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP37]]) +// CHECK5-NEXT: call void @__kmpc_free_shared(i8* [[TMP36]], i32 28) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] -// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] -// CHECK5-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] -// CHECK5-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP46]], [[TMP47]] -// CHECK5-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] -// CHECK5: cond.true12: -// CHECK5-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: br label [[COND_END14:%.*]] -// CHECK5: cond.false13: -// CHECK5-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: br label [[COND_END14]] -// CHECK5: cond.end14: -// CHECK5-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP48]], [[COND_TRUE12]] ], [ [[TMP49]], [[COND_FALSE13]] ] -// CHECK5-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP50]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK5-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] +// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP48]], [[TMP49]] +// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP50]], [[TMP51]] +// CHECK5-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK5: cond.true10: +// CHECK5-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: br label [[COND_END12:%.*]] +// CHECK5: cond.false11: +// CHECK5-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: br label [[COND_END12]] +// CHECK5: cond.end12: +// CHECK5-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP52]], [[COND_TRUE10]] ], [ [[TMP53]], [[COND_FALSE11]] ] +// CHECK5-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP54]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 -// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP52]]) -// CHECK5-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 -// CHECK5-NEXT: br i1 [[TMP54]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4 +// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP56]]) +// CHECK5-NEXT: [[TMP57:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0 +// CHECK5-NEXT: br i1 [[TMP58]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP55:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* -// CHECK5-NEXT: [[TMP56:%.*]] = bitcast [10 x i32]* [[C_ON_STACK]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP55]], i8* align 4 [[TMP56]], i32 40, i1 false) +// CHECK5-NEXT: [[TMP59:%.*]] = bitcast [10 x i32]* [[TMP4]] to i8* +// CHECK5-NEXT: [[TMP60:%.*]] = bitcast [10 x i32]* [[C_ON_STACK]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP59]], i8* align 4 [[TMP60]], i32 40, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: // CHECK5-NEXT: br label [[OMP_PRECOND_END]] // CHECK5: omp.precond.end: -// CHECK5-NEXT: call void @__kmpc_free_shared(i8* [[C1]], i32 40) +// CHECK5-NEXT: call void @__kmpc_free_shared(i8* [[C]], i32 40) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -579,101 +595,105 @@ // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[B3:%.*]] = alloca [10 x i32], align 4 -// CHECK5-NEXT: [[C4:%.*]] = alloca [10 x i32], align 4 -// CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK5-NEXT: [[C:%.*]] = alloca [10 x i32], align 4 +// CHECK5-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 -// CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK5-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 -// CHECK5-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP9]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP12:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP11]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP14:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP13]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK5-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK5-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK5-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK5: omp.precond.then: // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK5-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B3]] to i8* -// CHECK5-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 40, i1 false) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = bitcast [10 x i32]* [[B]] to i8* +// CHECK5-NEXT: [[TMP22:%.*]] = bitcast [10 x i32]* [[TMP10]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 40, i1 false) +// CHECK5-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP26]], [[TMP27]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK5-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[I5]]) #[[ATTR8:[0-9]+]] -// CHECK5-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[TMP1]]) #[[ATTR8]] -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] -// CHECK5-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX]]) #[[ATTR8]] -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] -// CHECK5-NEXT: [[CALL12:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX11]]) #[[ATTR8]] -// CHECK5-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] -// CHECK5-NEXT: [[CALL15:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX14]]) #[[ATTR8]] -// CHECK5-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] -// CHECK5-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK5-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[I3]]) #[[ATTR8:[0-9]+]] +// CHECK5-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[TMP8]]) #[[ATTR8]] +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[CALL]], [[CALL5]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 [[TMP29]] +// CHECK5-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX]]) #[[ATTR8]] +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD6]], [[CALL7]] +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C]], i32 0, i32 [[TMP30]] +// CHECK5-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX9]]) #[[ATTR8]] +// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD8]], [[CALL10]] +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK5-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP14]], i32 0, i32 [[TMP31]] +// CHECK5-NEXT: [[CALL13:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARRAYIDX12]]) #[[ATTR8]] +// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[ADD11]], [[CALL13]] +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[TMP8]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK5-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK5-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]]) -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK5-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP35]]) +// CHECK5-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK5-NEXT: br i1 [[TMP37]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK5-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C4]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 40, i1 false) +// CHECK5-NEXT: [[TMP38:%.*]] = bitcast [10 x i32]* [[TMP12]] to i8* +// CHECK5-NEXT: [[TMP39:%.*]] = bitcast [10 x i32]* [[C]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i32 40, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: // CHECK5-NEXT: br label [[OMP_PRECOND_END]] diff --git a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp @@ -28,14 +28,21 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21 // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK1-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK1-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon* +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP5]], %struct.anon* [[TMP6]], align 1 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* null, i8* [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -43,34 +50,44 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @_Z3usev() #[[ATTR8:[0-9]+]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @_Z3usev() #[[ATTR10:[0-9]+]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_Z3usev // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK1-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP1]], i64 0) +// CHECK1-NEXT: [[TMP1:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP1]], i8* [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to %struct.anon.0* +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP4]], %struct.anon.0* [[TMP5]], align 1 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8* [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP2]], i64 1) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 -// CHECK1-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK1-SAME: () #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: call void @_Z3usev() #[[ATTR8]] +// CHECK1-NEXT: call void @_Z3usev() #[[ATTR10]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -78,42 +95,52 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @_Z4workv() #[[ATTR8]] +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @_Z4workv() #[[ATTR10]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR9:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.0* null) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21 // CHECK2-SAME: () #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK2-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK2-NEXT: [[TMP2:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK2-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon* +// CHECK2-NEXT: store [[STRUCT_ANON]] [[TMP5]], %struct.anon* [[TMP6]], align 1 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* null, i8* [[TMP4]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -121,34 +148,44 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: call void @_Z3usev() #[[ATTR8:[0-9]+]] +// CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: call void @_Z3usev() #[[ATTR10:[0-9]+]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@_Z3usev // CHECK2-SAME: () #[[ATTR2:[0-9]+]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK2-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP1]], i32 0) +// CHECK2-NEXT: [[TMP1:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP1]], i8* [[TMP2]]) +// CHECK2-NEXT: [[TMP4:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to %struct.anon.0* +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP4]], %struct.anon.0* [[TMP5]], align 1 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8* [[TMP3]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP2]], i32 1) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 -// CHECK2-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK2-SAME: () #[[ATTR7:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: call void @_Z3usev() #[[ATTR8]] +// CHECK2-NEXT: call void @_Z3usev() #[[ATTR10]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -156,27 +193,30 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: call void @_Z4workv() #[[ATTR8]] +// CHECK2-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: call void @_Z4workv() #[[ATTR10]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR9:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]] +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.0* null) #[[ATTR4:[0-9]+]] // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp @@ -37,7 +37,8 @@ // CHECK1-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) @@ -45,12 +46,17 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: call void @_Z3usePi(i32* noundef [[TMP0]]) #[[ATTR7:[0-9]+]] -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP5]], i64 1) +// CHECK1-NEXT: call void @_Z3usePi(i32* noundef [[TMP0]]) #[[ATTR9:[0-9]+]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP5:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP4]], i8* [[TMP5]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to %struct.anon* +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP7]], %struct.anon* [[TMP8]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8* [[TMP6]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP5]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -61,28 +67,36 @@ // CHECK1-SAME: (i32* noundef [[C:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) // CHECK1-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i32** [[C_ADDR]] to i8* -// CHECK1-NEXT: store i8* [[TMP2]], i8** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 2, i32 -1, i8* bitcast (void (i32*, i32*, i32**)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP3]], i64 1) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32** [[C_ADDR]], i32*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK1-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.0* +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP5]], %struct.anon.0* [[TMP6]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 2, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8* [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 8) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @_Z3usePi(i32* noundef [[TMP0]]) #[[ATTR7]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: call void @_Z3usePi(i32* noundef [[TMP2]]) #[[ATTR9]] // CHECK1-NEXT: ret void // // @@ -92,31 +106,32 @@ // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[GLOBAL_ARGS]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.anon** +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast %struct.anon** [[TMP3]] to %struct.anon* +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon* [[TMP4]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP0]], align 8 -// CHECK1-NEXT: call void @_Z4workPi(i32* noundef [[TMP1]]) #[[ATTR7]] +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8 +// CHECK1-NEXT: call void @_Z4workPi(i32* noundef [[TMP3]]) #[[ATTR9]] // CHECK1-NEXT: ret void // // @@ -130,7 +145,7 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8* // CHECK1-NEXT: [[TMP2:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* -// CHECK1-NEXT: call void @__atomic_load(i64 noundef 4, i8* noundef [[TMP1]], i8* noundef [[TMP2]], i32 noundef 0) #[[ATTR7]] +// CHECK1-NEXT: call void @__atomic_load(i64 noundef 4, i8* noundef [[TMP1]], i8* noundef [[TMP2]], i32 noundef 0) #[[ATTR9]] // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[ATOMIC_TEMP]], align 4 @@ -139,7 +154,7 @@ // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* // CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[ATOMIC_TEMP1]] to i8* -// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 4, i8* noundef [[TMP4]], i8* noundef [[TMP5]], i8* noundef [[TMP6]], i32 noundef 0, i32 noundef 0) #[[ATTR7]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 4, i8* noundef [[TMP4]], i8* noundef [[TMP5]], i8* noundef [[TMP6]], i32 noundef 0, i32 noundef 0) #[[ATTR9]] // CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: // CHECK1-NEXT: ret void @@ -151,16 +166,15 @@ // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32*** -// CHECK1-NEXT: [[TMP5:%.*]] = load i32**, i32*** [[TMP4]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32** [[TMP5]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[GLOBAL_ARGS]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.anon.0** +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.0** [[TMP3]] to %struct.anon.0* +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.0* [[TMP4]]) #[[ATTR5]] // CHECK1-NEXT: ret void // // @@ -168,7 +182,8 @@ // CHECK2-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK2-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) @@ -176,12 +191,17 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: call void @_Z3usePi(i32* noundef [[TMP0]]) #[[ATTR7:[0-9]+]] -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP5]], i32 1) +// CHECK2-NEXT: call void @_Z3usePi(i32* noundef [[TMP0]]) #[[ATTR9:[0-9]+]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i32* [[TMP0]], i32** [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP5:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP4]], i8* [[TMP5]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to %struct.anon* +// CHECK2-NEXT: store [[STRUCT_ANON]] [[TMP7]], %struct.anon* [[TMP8]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8* [[TMP6]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP5]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -192,28 +212,36 @@ // CHECK2-SAME: (i32* noundef [[C:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) // CHECK2-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i32** [[C_ADDR]] to i8* -// CHECK2-NEXT: store i8* [[TMP2]], i8** [[TMP1]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 2, i32 -1, i8* bitcast (void (i32*, i32*, i32**)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP3]], i32 1) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i32** [[C_ADDR]], i32*** [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK2-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.0* +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP5]], %struct.anon.0* [[TMP6]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 2, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8* [[TMP4]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 4) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 -// CHECK2-NEXT: call void @_Z3usePi(i32* noundef [[TMP0]]) #[[ATTR7]] +// CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK2-NEXT: call void @_Z3usePi(i32* noundef [[TMP2]]) #[[ATTR9]] // CHECK2-NEXT: ret void // // @@ -223,31 +251,32 @@ // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4:[0-9]+]] +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[GLOBAL_ARGS]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.anon** +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast %struct.anon** [[TMP3]] to %struct.anon* +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon* [[TMP4]]) #[[ATTR5:[0-9]+]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32** noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32**, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP0]], align 4 -// CHECK2-NEXT: call void @_Z4workPi(i32* noundef [[TMP1]]) #[[ATTR7]] +// CHECK2-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 4 +// CHECK2-NEXT: call void @_Z4workPi(i32* noundef [[TMP3]]) #[[ATTR9]] // CHECK2-NEXT: ret void // // @@ -261,7 +290,7 @@ // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8* // CHECK2-NEXT: [[TMP2:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* -// CHECK2-NEXT: call void @__atomic_load(i32 noundef 4, i8* noundef [[TMP1]], i8* noundef [[TMP2]], i32 noundef 0) #[[ATTR7]] +// CHECK2-NEXT: call void @__atomic_load(i32 noundef 4, i8* noundef [[TMP1]], i8* noundef [[TMP2]], i32 noundef 0) #[[ATTR9]] // CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK2: atomic_cont: // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[ATOMIC_TEMP]], align 4 @@ -270,7 +299,7 @@ // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* // CHECK2-NEXT: [[TMP5:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i32* [[ATOMIC_TEMP1]] to i8* -// CHECK2-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i32 noundef 4, i8* noundef [[TMP4]], i8* noundef [[TMP5]], i8* noundef [[TMP6]], i32 noundef 0, i32 noundef 0) #[[ATTR7]] +// CHECK2-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i32 noundef 4, i8* noundef [[TMP4]], i8* noundef [[TMP5]], i8* noundef [[TMP6]], i32 noundef 0, i32 noundef 0) #[[ATTR9]] // CHECK2-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK2: atomic_exit: // CHECK2-NEXT: ret void @@ -282,15 +311,14 @@ // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32*** -// CHECK2-NEXT: [[TMP5:%.*]] = load i32**, i32*** [[TMP4]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32** [[TMP5]]) #[[ATTR4]] +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[GLOBAL_ARGS]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.anon.0** +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.0** [[TMP3]] to %struct.anon.0* +// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.0* [[TMP4]]) #[[ATTR5]] // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -77,9 +77,12 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG2:%.*]] = alloca [[STRUCT_ANON_0]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG4:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) @@ -87,14 +90,32 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP2]], i64 0) -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP3]], i64 0) -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS2]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** [[TMP4]], i64 0) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP3:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK1-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon* +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP5]], %struct.anon* [[TMP6]], align 1 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8* [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG2]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP7]], i8* [[TMP8]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]], align 1 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP9]] to %struct.anon.0* +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP10]], %struct.anon.0* [[TMP11]], align 1 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8* [[TMP9]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP8]], i64 1) +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct.anon.1* [[DOTTMP_OUTLINED_AGG_ARG4]] to i8* +// CHECK1-NEXT: [[TMP13:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[TMP14:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP12]], i8* [[TMP13]]) +// CHECK1-NEXT: [[TMP15:%.*]] = load [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_3]], align 1 +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP14]] to %struct.anon.1* +// CHECK1-NEXT: store [[STRUCT_ANON_1]] [[TMP15]], %struct.anon.1* [[TMP16]], align 1 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.1*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8* [[TMP14]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP13]], i64 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void @@ -103,13 +124,16 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 42, i32* [[A]], align 4 // CHECK1-NEXT: ret void // @@ -120,23 +144,26 @@ // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon* null) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 43, i32* [[A]], align 4 // CHECK1-NEXT: ret void // @@ -147,23 +174,26 @@ // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.0* null) #[[ATTR4]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 44, i32* [[A]], align 4 // CHECK1-NEXT: ret void // @@ -174,12 +204,12 @@ // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.1* null) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -190,7 +220,8 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 @@ -204,22 +235,28 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1000 -// CHECK1-NEXT: [[TMP4:%.*]] = zext i1 [[CMP]] to i32 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 [[TMP4]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP5]], i64 0) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = bitcast %struct.anon.2* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP4:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP3]], i8* [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP5]] to %struct.anon.2* +// CHECK1-NEXT: store [[STRUCT_ANON_2]] [[TMP6]], %struct.anon.2* [[TMP7]], align 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1000 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i1 [[CMP]] to i32 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 [[TMP9]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.2*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8* [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP4]], i64 1) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV2]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 2 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP11]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 // CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV2]], align 2 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void @@ -228,17 +265,20 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 45, i32* [[A]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -248,12 +288,12 @@ // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.2* null) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -261,7 +301,8 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) @@ -273,13 +314,18 @@ // CHECK1-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A1]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[A_ON_STACK]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[A_ON_STACK]] to i8* -// CHECK1-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__4 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__4_wrapper to i8*), i8** [[TMP5]], i64 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[A_ON_STACK]], i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.3* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP5:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP4]], i8* [[TMP5]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to %struct.anon.3* +// CHECK1-NEXT: store [[STRUCT_ANON_3]] [[TMP7]], %struct.anon.3* [[TMP8]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.3*)* @__omp_outlined__4 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__4_wrapper to i8*), i8* [[TMP6]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP5]], i64 8) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[A_ON_STACK]], align 4 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[A1]], i64 4) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -289,42 +335,44 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[CRITICAL_COUNTER:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask() -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = call i64 @__kmpc_warp_active_thread_mask() +// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() // CHECK1-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4 // CHECK1-NEXT: br label [[OMP_CRITICAL_LOOP:%.*]] // CHECK1: omp.critical.loop: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], [[NVPTX_NUM_THREADS]] -// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]] -// CHECK1: omp.critical.test: // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP2]], [[TMP5]] -// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]] +// CHECK1-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], [[NVPTX_NUM_THREADS]] +// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]] +// CHECK1: omp.critical.test: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP4]], [[TMP7]] +// CHECK1-NEXT: br i1 [[TMP8]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]] // CHECK1: omp.critical.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], [8 x i32]* @"_gomp_critical_user_$var") -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], [8 x i32]* @"_gomp_critical_user_$var") +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], [8 x i32]* @"_gomp_critical_user_$var") +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], [8 x i32]* @"_gomp_critical_user_$var") // CHECK1-NEXT: br label [[OMP_CRITICAL_SYNC]] // CHECK1: omp.critical.sync: -// CHECK1-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]]) -// CHECK1-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[CRITICAL_COUNTER]], align 4 +// CHECK1-NEXT: call void @__kmpc_syncwarp(i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP12:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[CRITICAL_COUNTER]], align 4 // CHECK1-NEXT: br label [[OMP_CRITICAL_LOOP]] // CHECK1: omp.critical.exit: // CHECK1-NEXT: ret void @@ -336,16 +384,15 @@ // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[GLOBAL_ARGS]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.anon.3** +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.3** [[TMP3]] to %struct.anon.3* +// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.3* [[TMP4]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -353,23 +400,44 @@ // CHECK2-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x i8*], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG2:%.*]] = alloca [[STRUCT_ANON_0]], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG4:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP2]], i32 0) -// CHECK2-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP3]], i32 0) -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS2]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** [[TMP4]], i32 0) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK2-NEXT: [[TMP2:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP3:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK2-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon* +// CHECK2-NEXT: store [[STRUCT_ANON]] [[TMP5]], %struct.anon* [[TMP6]], align 1 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8* [[TMP4]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) +// CHECK2-NEXT: [[TMP7:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG2]] to i8* +// CHECK2-NEXT: [[TMP8:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP7]], i8* [[TMP8]]) +// CHECK2-NEXT: [[TMP10:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]], align 1 +// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP9]] to %struct.anon.0* +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP10]], %struct.anon.0* [[TMP11]], align 1 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8* [[TMP9]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP8]], i32 1) +// CHECK2-NEXT: [[TMP12:%.*]] = bitcast %struct.anon.1* [[DOTTMP_OUTLINED_AGG_ARG4]] to i8* +// CHECK2-NEXT: [[TMP13:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[TMP14:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP12]], i8* [[TMP13]]) +// CHECK2-NEXT: [[TMP15:%.*]] = load [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_3]], align 1 +// CHECK2-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP14]] to %struct.anon.1* +// CHECK2-NEXT: store [[STRUCT_ANON_1]] [[TMP15]], %struct.anon.1* [[TMP16]], align 1 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.1*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8* [[TMP14]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP13]], i32 1) +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void @@ -378,13 +446,16 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK2-NEXT: store i32 42, i32* [[A]], align 4 // CHECK2-NEXT: ret void // @@ -395,23 +466,26 @@ // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon* null) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 // CHECK2-NEXT: store i32 43, i32* [[A]], align 4 // CHECK2-NEXT: ret void // @@ -422,23 +496,26 @@ // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.0* null) #[[ATTR3]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 // CHECK2-NEXT: store i32 44, i32* [[A]], align 4 // CHECK2-NEXT: ret void // @@ -449,12 +526,12 @@ // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.1* null) #[[ATTR3]] // CHECK2-NEXT: ret void // // @@ -465,7 +542,8 @@ // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 @@ -477,22 +555,28 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1000 -// CHECK2-NEXT: [[TMP4:%.*]] = zext i1 [[CMP]] to i32 -// CHECK2-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 [[TMP4]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP5]], i32 0) -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK2-NEXT: [[TMP3:%.*]] = bitcast %struct.anon.2* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP4:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP3]], i8* [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP5]] to %struct.anon.2* +// CHECK2-NEXT: store [[STRUCT_ANON_2]] [[TMP6]], %struct.anon.2* [[TMP7]], align 1 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1000 +// CHECK2-NEXT: [[TMP9:%.*]] = zext i1 [[CMP]] to i32 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 [[TMP9]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.2*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8* [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP4]], i32 1) +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP11]] to i32 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 // CHECK2-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK2-NEXT: store i32 [[ADD4]], i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void @@ -501,17 +585,20 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 // CHECK2-NEXT: store i32 45, i32* [[A]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK2-NEXT: ret void // // @@ -521,12 +608,12 @@ // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK2-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.2* null) #[[ATTR3]] // CHECK2-NEXT: ret void // // @@ -534,7 +621,8 @@ // CHECK2-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -545,13 +633,18 @@ // CHECK2-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A1]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[A_ON_STACK]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[A_ON_STACK]] to i8* -// CHECK2-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__4 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__4_wrapper to i8*), i8** [[TMP5]], i32 1) -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i32* [[A_ON_STACK]], i32** [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.3* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP5:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP4]], i8* [[TMP5]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to %struct.anon.3* +// CHECK2-NEXT: store [[STRUCT_ANON_3]] [[TMP7]], %struct.anon.3* [[TMP8]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.3*)* @__omp_outlined__4 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__4_wrapper to i8*), i8* [[TMP6]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP5]], i32 4) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK2-NEXT: store i32 [[INC]], i32* [[A_ON_STACK]], align 4 // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[A1]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -561,42 +654,44 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK2-NEXT: [[CRITICAL_COUNTER:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask() -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK2-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = call i64 @__kmpc_warp_active_thread_mask() +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() // CHECK2-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4 // CHECK2-NEXT: br label [[OMP_CRITICAL_LOOP:%.*]] // CHECK2: omp.critical.loop: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], [[NVPTX_NUM_THREADS]] -// CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]] -// CHECK2: omp.critical.test: // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP2]], [[TMP5]] -// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]] +// CHECK2-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], [[NVPTX_NUM_THREADS]] +// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]] +// CHECK2: omp.critical.test: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP4]], [[TMP7]] +// CHECK2-NEXT: br i1 [[TMP8]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]] // CHECK2: omp.critical.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], [8 x i32]* @"_gomp_critical_user_$var") -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK2-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 -// CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], [8 x i32]* @"_gomp_critical_user_$var") +// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], [8 x i32]* @"_gomp_critical_user_$var") +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK2-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 +// CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], [8 x i32]* @"_gomp_critical_user_$var") // CHECK2-NEXT: br label [[OMP_CRITICAL_SYNC]] // CHECK2: omp.critical.sync: -// CHECK2-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]]) -// CHECK2-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK2-NEXT: store i32 [[TMP10]], i32* [[CRITICAL_COUNTER]], align 4 +// CHECK2-NEXT: call void @__kmpc_syncwarp(i64 [[TMP3]]) +// CHECK2-NEXT: [[TMP12:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: store i32 [[TMP12]], i32* [[CRITICAL_COUNTER]], align 4 // CHECK2-NEXT: br label [[OMP_CRITICAL_LOOP]] // CHECK2: omp.critical.exit: // CHECK2-NEXT: ret void @@ -608,15 +703,14 @@ // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[GLOBAL_ARGS]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.anon.3** +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.3** [[TMP3]] to %struct.anon.3* +// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.3* [[TMP4]]) #[[ATTR3]] // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp --- a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp @@ -37,7 +37,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* @@ -51,17 +52,21 @@ // CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK-NEXT: store i32 [[TMP3]], i32* [[D_ON_STACK]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-NEXT: [[TMP5:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[D_ON_STACK]] to i8* -// CHECK-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x i32]*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP8]], i64 2) +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP4]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-NEXT: store i32* [[D_ON_STACK]], i32** [[TMP5]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-NEXT: [[TMP7:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP6]], i8* [[TMP7]]) +// CHECK-NEXT: [[TMP9:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP8]] to %struct.anon* +// CHECK-NEXT: store [[STRUCT_ANON]] [[TMP9]], %struct.anon* [[TMP10]], align 8 +// CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8* [[TMP8]]) +// CHECK-NEXT: call void @__kmpc_free_shared(i8* [[TMP7]], i64 16) // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 3 -// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4 // CHECK-NEXT: call void @__kmpc_free_shared(i8* [[D]], i64 4) // CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -71,12 +76,11 @@ // // // CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -86,77 +90,79 @@ // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[D_ADDR]], align 8 +// CHECK-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 // CHECK-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK: omp.dispatch.cond: -// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK: cond.true: // CHECK-NEXT: br label [[COND_END:%.*]] // CHECK: cond.false: -// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: -// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK: omp.dispatch.body: // CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK: omp.inner.for.cond: -// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK: omp.inner.for.body: -// CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP12]] +// CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP15]] // CHECK-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 // CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK: omp.body.continue: // CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK: omp.inner.for.inc: -// CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK: omp.inner.for.end: // CHECK-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK: omp.dispatch.inc: -// CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_LB]], align 4 -// CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_UB]], align 4 // CHECK-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK: omp.dispatch.end: -// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]]) // CHECK-NEXT: ret void // // @@ -166,18 +172,14 @@ // CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 8 // CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 -// CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to [10 x i32]** -// CHECK-NEXT: [[TMP5:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP4]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 -// CHECK-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** -// CHECK-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 -// CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP5]], i32* [[TMP8]]) #[[ATTR3:[0-9]+]] +// CHECK-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK-NEXT: [[TMP2:%.*]] = load i8*, i8** [[GLOBAL_ARGS]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.anon** +// CHECK-NEXT: [[TMP4:%.*]] = bitcast %struct.anon** [[TMP3]] to %struct.anon* +// CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon* [[TMP4]]) #[[ATTR3:[0-9]+]] // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -149,7 +149,8 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[PTR1_ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[PTR2_ADDR:%.*]] = alloca i32**, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: store i32* [[PTR1]], i32** [[PTR1_ADDR]], align 8 // CHECK1-NEXT: store i32** [[PTR2]], i32*** [[PTR2_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[PTR2_ADDR]], align 8 @@ -158,14 +159,18 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32** [[PTR1_ADDR]] to i8* -// CHECK1-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32** [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 2, i32 -1, i8* bitcast (void (i32*, i32*, i32**, i32**)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP7]], i64 2) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32** [[PTR1_ADDR]], i32*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32** [[TMP0]], i32*** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP6:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK1-NEXT: [[TMP7:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP5]], i8* [[TMP6]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP8]], %struct.anon* [[TMP9]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 2, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* null, i8* [[TMP7]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP6]], i64 16) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -173,27 +178,28 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32** nonnull align 8 dereferenceable(8) [[PTR1:%.*]], i32** nonnull align 8 dereferenceable(8) [[PTR2:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[PTR1_ADDR:%.*]] = alloca i32**, align 8 -// CHECK1-NEXT: [[PTR2_ADDR:%.*]] = alloca i32**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32** [[PTR1]], i32*** [[PTR1_ADDR]], align 8 -// CHECK1-NEXT: store i32** [[PTR2]], i32*** [[PTR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[PTR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[PTR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP0]], align 8 -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[TMP4]], align 4 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[TMP2]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[TMP7]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39 -// CHECK1-SAME: () #[[ATTR4:[0-9]+]] { +// CHECK1-SAME: () #[[ATTR6:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -206,7 +212,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47 -// CHECK1-SAME: (i64 [[AA:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i64 [[AA:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 @@ -232,7 +238,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53 -// CHECK1-SAME: (i64 [[A:%.*]], [10 x float]* nonnull align 4 dereferenceable(40) [[B:%.*]], i64 [[VLA:%.*]], float* nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* nonnull align 8 dereferenceable(400) [[C:%.*]], i64 [[VLA1:%.*]], i64 [[VLA3:%.*]], double* nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i64 [[A:%.*]], [10 x float]* nonnull align 4 dereferenceable(40) [[B:%.*]], i64 [[VLA:%.*]], float* nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* nonnull align 8 dereferenceable(400) [[C:%.*]], i64 [[VLA1:%.*]], i64 [[VLA3:%.*]], double* nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 @@ -312,7 +318,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2TTIxcEixEi -// CHECK1-SAME: (%struct.TT* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR5:[0-9]+]] comdat align 2 { +// CHECK1-SAME: (%struct.TT* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR7:[0-9]+]] comdat align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.TT*, align 8 // CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 @@ -324,7 +330,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90 -// CHECK1-SAME: (i64 [[A:%.*]], i64 [[AA:%.*]], i64 [[AAA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i64 [[A:%.*]], i64 [[AA:%.*]], i64 [[AAA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 @@ -366,7 +372,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108 -// CHECK1-SAME: (%struct.S1* [[THIS:%.*]], i64 [[B:%.*]], i64 [[VLA:%.*]], i64 [[VLA1:%.*]], i16* nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (%struct.S1* [[THIS:%.*]], i64 [[B:%.*]], i64 [[VLA:%.*]], i64 [[VLA1:%.*]], i16* nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 @@ -413,31 +419,36 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_Z3baziRd -// CHECK1-SAME: (i32 [[F1:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (i32 [[F1:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR7]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) // CHECK1-NEXT: [[F:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[F_ON_STACK:%.*]] = bitcast i8* [[F]] to i32* // CHECK1-NEXT: store i32 [[F1]], i32* [[F_ON_STACK]], align 4 // CHECK1-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[F_ON_STACK]] to i8* -// CHECK1-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast double* [[TMP1]] to i8* -// CHECK1-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, double*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP6]], i64 2) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[F_ON_STACK]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[F_ON_STACK]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load double*, double** [[A_ADDR]], align 8 +// CHECK1-NEXT: store double* [[TMP3]], double** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP5:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK1-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP4]], i8* [[TMP5]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to %struct.anon.0* +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP7]], %struct.anon.0* [[TMP8]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8* [[TMP6]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP5]], i64 16) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ON_STACK]], align 4 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[F]], i64 4) -// CHECK1-NEXT: ret i32 [[TMP7]] +// CHECK1-NEXT: ret i32 [[TMP9]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142 -// CHECK1-SAME: () #[[ATTR4]] { +// CHECK1-SAME: () #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -453,7 +464,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 -// CHECK1-SAME: (i64 [[A:%.*]], i64 [[AA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i64 [[A:%.*]], i64 [[AA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 @@ -487,47 +498,44 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[F:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[F]], i32** [[F_ADDR]], align 8 -// CHECK1-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[F_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[A_ADDR]], align 8 -// CHECK1-NEXT: store double* [[TMP1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double, double* [[TMP2]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]] +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 8 +// CHECK1-NEXT: store double* [[TMP4]], double** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load double, double* [[TMP5]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP6]] // CHECK1-NEXT: [[CONV:%.*]] = fptosi double [[ADD]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[TMP2]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR9:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to double** -// CHECK1-NEXT: [[TMP8:%.*]] = load double*, double** [[TMP7]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], double* [[TMP8]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[GLOBAL_ARGS]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.anon.0** +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.0** [[TMP3]] to %struct.anon.0* +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.0* [[TMP4]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: ret void // // @@ -536,7 +544,8 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[PTR1_ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[PTR2_ADDR:%.*]] = alloca i32**, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK2-NEXT: store i32* [[PTR1]], i32** [[PTR1_ADDR]], align 4 // CHECK2-NEXT: store i32** [[PTR2]], i32*** [[PTR2_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[PTR2_ADDR]], align 4 @@ -545,14 +554,18 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32** [[PTR1_ADDR]] to i8* -// CHECK2-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i32** [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 2, i32 -1, i8* bitcast (void (i32*, i32*, i32**, i32**)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP7]], i32 2) +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i32** [[PTR1_ADDR]], i32*** [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store i32** [[TMP0]], i32*** [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP6:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK2-NEXT: [[TMP7:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP5]], i8* [[TMP6]]) +// CHECK2-NEXT: [[TMP8:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK2-NEXT: store [[STRUCT_ANON]] [[TMP8]], %struct.anon* [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 2, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* null, i8* [[TMP7]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP6]], i32 8) // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -560,27 +573,28 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32** nonnull align 4 dereferenceable(4) [[PTR1:%.*]], i32** nonnull align 4 dereferenceable(4) [[PTR2:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[PTR1_ADDR:%.*]] = alloca i32**, align 4 -// CHECK2-NEXT: [[PTR2_ADDR:%.*]] = alloca i32**, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32** [[PTR1]], i32*** [[PTR1_ADDR]], align 4 -// CHECK2-NEXT: store i32** [[PTR2]], i32*** [[PTR2_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[PTR1_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[PTR2_ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP0]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], i32* [[TMP4]], align 4 +// CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[TMP2]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[TMP7]], align 4 // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39 -// CHECK2-SAME: () #[[ATTR4:[0-9]+]] { +// CHECK2-SAME: () #[[ATTR6:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -593,7 +607,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47 -// CHECK2-SAME: (i32 [[AA:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (i32 [[AA:%.*]]) #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 @@ -619,7 +633,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53 -// CHECK2-SAME: (i32 [[A:%.*]], [10 x float]* nonnull align 4 dereferenceable(40) [[B:%.*]], i32 [[VLA:%.*]], float* nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* nonnull align 8 dereferenceable(400) [[C:%.*]], i32 [[VLA1:%.*]], i32 [[VLA3:%.*]], double* nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (i32 [[A:%.*]], [10 x float]* nonnull align 4 dereferenceable(40) [[B:%.*]], i32 [[VLA:%.*]], float* nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* nonnull align 8 dereferenceable(400) [[C:%.*]], i32 [[VLA1:%.*]], i32 [[VLA3:%.*]], double* nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 @@ -698,7 +712,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@_ZN2TTIxcEixEi -// CHECK2-SAME: (%struct.TT* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR5:[0-9]+]] comdat align 2 { +// CHECK2-SAME: (%struct.TT* nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR7:[0-9]+]] comdat align 2 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.TT*, align 4 // CHECK2-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 @@ -710,7 +724,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90 -// CHECK2-SAME: (i32 [[A:%.*]], i32 [[AA:%.*]], i32 [[AAA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (i32 [[A:%.*]], i32 [[AA:%.*]], i32 [[AAA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 @@ -751,7 +765,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108 -// CHECK2-SAME: (%struct.S1* [[THIS:%.*]], i32 [[B:%.*]], i32 [[VLA:%.*]], i32 [[VLA1:%.*]], i16* nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (%struct.S1* [[THIS:%.*]], i32 [[B:%.*]], i32 [[VLA:%.*]], i32 [[VLA1:%.*]], i16* nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 @@ -797,31 +811,36 @@ // // // CHECK2-LABEL: define {{[^@]+}}@_Z3baziRd -// CHECK2-SAME: (i32 [[F1:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] { +// CHECK2-SAME: (i32 [[F1:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR7]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) // CHECK2-NEXT: [[F:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: [[F_ON_STACK:%.*]] = bitcast i8* [[F]] to i32* // CHECK2-NEXT: store i32 [[F1]], i32* [[F_ON_STACK]], align 4 // CHECK2-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load double*, double** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[F_ON_STACK]] to i8* -// CHECK2-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP5:%.*]] = bitcast double* [[TMP1]] to i8* -// CHECK2-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, double*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP6]], i32 2) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[F_ON_STACK]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i32* [[F_ON_STACK]], i32** [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP3:%.*]] = load double*, double** [[A_ADDR]], align 4 +// CHECK2-NEXT: store double* [[TMP3]], double** [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP5:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK2-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP4]], i8* [[TMP5]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to %struct.anon.0* +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP7]], %struct.anon.0* [[TMP8]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8* [[TMP6]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP5]], i32 8) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ON_STACK]], align 4 // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[F]], i32 4) -// CHECK2-NEXT: ret i32 [[TMP7]] +// CHECK2-NEXT: ret i32 [[TMP9]] // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142 -// CHECK2-SAME: () #[[ATTR4]] { +// CHECK2-SAME: () #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -837,7 +856,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 -// CHECK2-SAME: (i32 [[A:%.*]], i32 [[AA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (i32 [[A:%.*]], i32 [[AA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 @@ -870,46 +889,43 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[F:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32* [[F]], i32** [[F_ADDR]], align 4 -// CHECK2-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[F_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load double*, double** [[A_ADDR]], align 4 -// CHECK2-NEXT: store double* [[TMP1]], double** [[TMP]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load double, double* [[TMP2]], align 8 -// CHECK2-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]] +// CHECK2-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 4 +// CHECK2-NEXT: store double* [[TMP4]], double** [[TMP]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load double, double* [[TMP5]], align 8 +// CHECK2-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP6]] // CHECK2-NEXT: [[CONV:%.*]] = fptosi double [[ADD]] to i32 -// CHECK2-NEXT: store i32 [[CONV]], i32* [[TMP0]], align 4 +// CHECK2-NEXT: store i32 [[CONV]], i32* [[TMP2]], align 4 // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR9:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 1 -// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to double** -// CHECK2-NEXT: [[TMP8:%.*]] = load double*, double** [[TMP7]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], double* [[TMP8]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[GLOBAL_ARGS]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.anon.0** +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.0** [[TMP3]] to %struct.anon.0* +// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.0* [[TMP4]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp @@ -55,7 +55,8 @@ // CHECK1-SAME: (i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) @@ -63,11 +64,16 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i64 1) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i16* [[TMP0]], i16** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP5:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP4]], i8* [[TMP5]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to %struct.anon* +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP7]], %struct.anon* [[TMP8]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* null, i8* [[TMP6]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP5]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -75,20 +81,22 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2 +// CHECK1-NEXT: store i16 [[CONV1]], i16* [[TMP2]], align 2 // CHECK1-NEXT: ret void // // @@ -98,7 +106,8 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 @@ -110,17 +119,20 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP1]] to i8* -// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP11]], i64 3) +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i16* [[TMP1]], i16** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP9:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 24) +// CHECK1-NEXT: [[TMP10:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP8]], i8* [[TMP9]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to %struct.anon.0* +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP11]], %struct.anon.0* [[TMP12]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP9]], i64 24) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -128,32 +140,32 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[TMP4]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], i16* [[TMP1]], align 2 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: store i16 [[CONV2]], i16* [[TMP4]], align 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: ret void // @@ -162,7 +174,8 @@ // CHECK2-SAME: (i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) @@ -170,11 +183,16 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i32 1) +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i16* [[TMP0]], i16** [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP5:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP4]], i8* [[TMP5]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to %struct.anon* +// CHECK2-NEXT: store [[STRUCT_ANON]] [[TMP7]], %struct.anon* [[TMP8]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* null, i8* [[TMP6]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP5]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -182,20 +200,22 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2 +// CHECK2-NEXT: store i16 [[CONV1]], i16* [[TMP2]], align 2 // CHECK2-NEXT: ret void // // @@ -205,7 +225,8 @@ // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 @@ -217,17 +238,20 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP1]] to i8* -// CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP10:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP11]], i32 3) +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i32* [[TMP0]], i32** [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store i16* [[TMP1]], i16** [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP9:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 12) +// CHECK2-NEXT: [[TMP10:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP8]], i8* [[TMP9]]) +// CHECK2-NEXT: [[TMP11:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to %struct.anon.0* +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP11]], %struct.anon.0* [[TMP12]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP10]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP9]], i32 12) // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -235,32 +259,32 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 -// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK2-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[TMP4]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK2-NEXT: store i16 [[CONV2]], i16* [[TMP1]], align 2 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK2-NEXT: store i16 [[CONV2]], i16* [[TMP4]], align 2 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp @@ -50,7 +50,8 @@ // CHECK1-SAME: (i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) @@ -58,11 +59,16 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i64 1) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i16* [[TMP0]], i16** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP5:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP4]], i8* [[TMP5]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to %struct.anon* +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP7]], %struct.anon* [[TMP8]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* null, i8* [[TMP6]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP5]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -70,20 +76,22 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2 +// CHECK1-NEXT: store i16 [[CONV1]], i16* [[TMP2]], align 2 // CHECK1-NEXT: ret void // // @@ -94,7 +102,8 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 @@ -109,17 +118,20 @@ // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP1]] to i8* -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP12]], i64 3) +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i16* [[TMP1]], i16** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP10:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 24) +// CHECK1-NEXT: [[TMP11:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP9]], i8* [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to %struct.anon.0* +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP12]], %struct.anon.0* [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP11]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP10]], i64 24) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -127,32 +139,32 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[TMP4]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], i16* [[TMP1]], align 2 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: store i16 [[CONV2]], i16* [[TMP4]], align 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: ret void // @@ -161,7 +173,8 @@ // CHECK2-SAME: (i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) @@ -169,11 +182,16 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i32 1) +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i16* [[TMP0]], i16** [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP5:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP4]], i8* [[TMP5]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to %struct.anon* +// CHECK2-NEXT: store [[STRUCT_ANON]] [[TMP7]], %struct.anon* [[TMP8]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* null, i8* [[TMP6]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP5]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -181,20 +199,22 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2 +// CHECK2-NEXT: store i16 [[CONV1]], i16* [[TMP2]], align 2 // CHECK2-NEXT: ret void // // @@ -205,7 +225,8 @@ // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 @@ -219,17 +240,20 @@ // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP1]] to i8* -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP12]], i32 3) +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i32* [[TMP0]], i32** [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store i16* [[TMP1]], i16** [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP10:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 12) +// CHECK2-NEXT: [[TMP11:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP9]], i8* [[TMP10]]) +// CHECK2-NEXT: [[TMP12:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to %struct.anon.0* +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP12]], %struct.anon.0* [[TMP13]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP11]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP10]], i32 12) // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -237,32 +261,32 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 -// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK2-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[TMP4]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK2-NEXT: store i16 [[CONV2]], i16* [[TMP1]], align 2 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK2-NEXT: store i16 [[CONV2]], i16* [[TMP4]], align 2 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp @@ -33,6 +33,7 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16 // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) @@ -42,7 +43,7 @@ // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR6:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -50,10 +51,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -63,113 +65,119 @@ // CHECK1-NEXT: [[IB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[REF_TMP2:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12:![0-9]+]] // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: [[ISTART:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* // CHECK1-NEXT: [[IEND:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* // CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 8) // CHECK1-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex"* -// CHECK1-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* +// CHECK1-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR6]] -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR6]] -// CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR6]] -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* +// CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR6]] -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[IB]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP14]]) #[[ATTR6]] -// CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14:![0-9]+]] -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast float* [[REF_TMP2]] to i8* +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast float* [[REF_TMP]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR6]] -// CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR12:[0-9]+]] +// CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14:![0-9]+]] // CHECK1-NEXT: [[TMP16:%.*]] = bitcast float* [[REF_TMP2]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast float* [[REF_TMP]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR6]] +// CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA14]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR11:[0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast float* [[REF_TMP2]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 -// CHECK1-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast float* [[REF_TMP]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP18]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP19]], 4 +// CHECK1-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 // CHECK1-NEXT: store i32 [[MUL5]], i32* [[IEND_ON_STACK]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[ISTART_ON_STACK]] to i8* -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[IEND_ON_STACK]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM_ON_STACK]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex"*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP26]], i64 3) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[ISTART_ON_STACK]], i32** [[TMP21]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[IEND_ON_STACK]], i32** [[TMP22]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %"class.std::complex"* [[PARTIAL_SUM_ON_STACK]], %"class.std::complex"** [[TMP23]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP25:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 24) +// CHECK1-NEXT: [[TMP26:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP24]], i8* [[TMP25]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP26]] to %struct.anon.0* +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP27]], %struct.anon.0* [[TMP28]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8* [[TMP26]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP25]], i64 24) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast i32* [[IB]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP34]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP35]]) #[[ATTR6]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 8) // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) @@ -188,18 +196,16 @@ // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR12]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR11]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ISTART:%.*]], i32* nonnull align 4 dereferenceable(4) [[IEND:%.*]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ISTART_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[IEND_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -210,204 +216,206 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex", align 4 +// CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = alloca %"class.std::complex", align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[REF_TMP6:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[I7:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex", align 4 +// CHECK1-NEXT: [[REF_TMP5:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[REF_TMP13:%.*]] = alloca %"class.std::complex", align 4 +// CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[REF_TMP15:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: store %"class.std::complex"* [[PARTIAL_SUM]], %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP5]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP7]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP10]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP12]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[I]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[I]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[I]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR6]] // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP21]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR6]] // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR6]] // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP22:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP22]]) #[[ATTR6]] -// CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast float* [[REF_TMP6]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR6]] -// CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR12]] -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast float* [[REF_TMP6]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP25]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP25]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast float* [[REF_TMP]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14]] +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast float* [[REF_TMP5]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP27]]) #[[ATTR6]] +// CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP5]], align 4, !tbaa [[TBAA14]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP5]]) #[[ATTR11]] +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast float* [[REF_TMP5]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast float* [[REF_TMP]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast i32* [[I6]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP32]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP29]], [[TMP30]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP7:%.*]] = icmp ugt i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE]] ], [ [[TMP32]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP35]], [[COND_TRUE]] ], [ [[TMP36]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP35]], 1 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP34]], [[ADD9]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP37]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD8:%.*]] = add i32 [[TMP39]], 1 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP38]], [[ADD8]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] // CHECK1: omp.dispatch.cleanup: // CHECK1-NEXT: br label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP37]], 1 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP36]], [[ADD11]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD10:%.*]] = add i32 [[TMP41]], 1 +// CHECK1-NEXT: [[CMP11:%.*]] = icmp ult i32 [[TMP40]], [[ADD10]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP39]], 1 -// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP40]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP41:%.*]] = bitcast float* [[REF_TMP15]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP41]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to float -// CHECK1-NEXT: store float [[CONV]], float* [[REF_TMP15]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: [[TMP43:%.*]] = bitcast float* [[REF_TMP16]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP43]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to float -// CHECK1-NEXT: store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR12]] -// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR12]] -// CHECK1-NEXT: [[TMP45:%.*]] = bitcast float* [[REF_TMP16]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP46:%.*]] = bitcast float* [[REF_TMP15]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP46]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP47]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP43]], 1 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP42]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[I6]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP44:%.*]] = bitcast %"class.std::complex"* [[REF_TMP13]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP44]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast float* [[REF_TMP14]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[I6]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP46]] to float +// CHECK1-NEXT: store float [[CONV]], float* [[REF_TMP14]], align 4, !tbaa [[TBAA14]] +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast float* [[REF_TMP15]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP47]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[I6]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CONV16:%.*]] = sitofp i32 [[TMP48]] to float +// CHECK1-NEXT: store float [[CONV16]], float* [[REF_TMP15]], align 4, !tbaa [[TBAA14]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP13]], float* nonnull align 4 dereferenceable(4) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]]) #[[ATTR11]] +// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP13]]) #[[ATTR11]] +// CHECK1-NEXT: [[TMP49:%.*]] = bitcast float* [[REF_TMP15]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP49]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP50:%.*]] = bitcast float* [[REF_TMP14]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP50]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP51:%.*]] = bitcast %"class.std::complex"* [[REF_TMP13]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP51]]) #[[ATTR6]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP48]], 1 -// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD17:%.*]] = add i32 [[TMP52]], 1 +// CHECK1-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP49]], [[TMP50]] -// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP51]], [[TMP52]] -// CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP55]], [[TMP56]] +// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP54]]) -// CHECK1-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP58:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* -// CHECK1-NEXT: store i8* [[TMP58]], i8** [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP60:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP56]], i32 1, i64 8, i8* [[TMP59]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func) -// CHECK1-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 -// CHECK1-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1-NEXT: [[TMP57:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP57]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP58]]) +// CHECK1-NEXT: [[TMP59:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP59]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP62:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM]] to i8* +// CHECK1-NEXT: store i8* [[TMP62]], i8** [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP64:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP60]], i32 1, i64 8, i8* [[TMP63]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func) +// CHECK1-NEXT: [[TMP65:%.*]] = icmp eq i32 [[TMP64]], 1 +// CHECK1-NEXT: br i1 [[TMP65]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR12]] -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) +// CHECK1-NEXT: [[CALL20:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP6]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM]]) #[[ATTR11]] +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP60]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: -// CHECK1-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP63]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* +// CHECK1-NEXT: [[TMP66:%.*]] = bitcast i32* [[I6]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR6]] -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: -// CHECK1-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* +// CHECK1-NEXT: [[TMP67:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP67]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* +// CHECK1-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* +// CHECK1-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK1-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR6]] +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: [[TMP72:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP72]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP73:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP73]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP74:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP74]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP75:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP75]]) #[[ATTR6]] // CHECK1-NEXT: ret void // // @@ -420,13 +428,13 @@ // CHECK1-NEXT: store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR12]] +// CHECK1-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR11]] // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16:![0-9]+]] // CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]] // CHECK1-NEXT: store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] // CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR12]] +// CHECK1-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR11]] // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18:![0-9]+]] // CHECK1-NEXT: [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]] @@ -435,7 +443,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func -// CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 @@ -512,7 +520,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -573,33 +581,27 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex"** -// CHECK1-NEXT: [[TMP11:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP10]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex"* [[TMP11]]) #[[ATTR6]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[GLOBAL_ARGS]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.anon.0** +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.0** [[TMP3]] to %struct.anon.0* +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.0* [[TMP4]]) #[[ATTR6]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16 // CHECK1-SAME: () #[[ATTR0]] { // CHECK1-NEXT: entry: +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) @@ -609,7 +611,7 @@ // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR6]] +// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR6]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -617,10 +619,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -630,113 +633,119 @@ // CHECK1-NEXT: [[IB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[REF_TMP2:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: [[ISTART:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* // CHECK1-NEXT: [[IEND:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* // CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 16) -// CHECK1-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex.0"* -// CHECK1-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* +// CHECK1-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex.2"* +// CHECK1-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR6]] -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR6]] -// CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR6]] -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* +// CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR6]] -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[IB]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP14]]) #[[ATTR6]] -// CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22:![0-9]+]] -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast double* [[REF_TMP2]] to i8* +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast double* [[REF_TMP]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP15]]) #[[ATTR6]] -// CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP2]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR12]] +// CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22:![0-9]+]] // CHECK1-NEXT: [[TMP16:%.*]] = bitcast double* [[REF_TMP2]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP16]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast double* [[REF_TMP]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP16]]) #[[ATTR6]] +// CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP2]], align 8, !tbaa [[TBAA22]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.2"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR11]] +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast double* [[REF_TMP2]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP17]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 -// CHECK1-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast double* [[REF_TMP]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP18]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP19]], 4 +// CHECK1-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 // CHECK1-NEXT: store i32 [[MUL5]], i32* [[IEND_ON_STACK]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[ISTART_ON_STACK]] to i8* -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[IEND_ON_STACK]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM_ON_STACK]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex.0"*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP26]], i64 3) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[ISTART_ON_STACK]], i32** [[TMP21]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[IEND_ON_STACK]], i32** [[TMP22]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %"class.std::complex.2"* [[PARTIAL_SUM_ON_STACK]], %"class.std::complex.2"** [[TMP23]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast %struct.anon.3* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP25:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 24) +// CHECK1-NEXT: [[TMP26:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP24]], i8* [[TMP25]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP26]] to %struct.anon.3* +// CHECK1-NEXT: store [[STRUCT_ANON_3]] [[TMP27]], %struct.anon.3* [[TMP28]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.3*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8* [[TMP26]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP25]], i64 24) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast i32* [[IB]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP34]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP35]]) #[[ATTR6]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 16) // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) @@ -744,29 +753,27 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC1ERKdS2_ -// CHECK1-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK1-SAME: (%"class.std::complex.2"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.2"*, align 8 // CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store %"class.std::complex.2"* [[THIS]], %"class.std::complex.2"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.2"*, %"class.std::complex.2"** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[__IM_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR12]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.2"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR11]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ISTART:%.*]], i32* nonnull align 4 dereferenceable(4) [[IEND:%.*]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ISTART_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[IEND_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -777,239 +784,241 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex.0", align 8 +// CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = alloca %"class.std::complex.2", align 8 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[REF_TMP6:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[I7:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex.0", align 8 +// CHECK1-NEXT: [[REF_TMP5:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[REF_TMP13:%.*]] = alloca %"class.std::complex.2", align 8 +// CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[REF_TMP15:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: store %"class.std::complex.0"* [[PARTIAL_SUM]], %"class.std::complex.0"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %"class.std::complex.2"*, %"class.std::complex.2"** [[TMP5]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP7]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP10]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP12]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[I]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[I]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[I]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR6]] // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP21]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR6]] // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR6]] // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP21]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP22:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP22]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast %"class.std::complex.2"* [[PARTIAL_SUM]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP25]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast double* [[REF_TMP]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP26]]) #[[ATTR6]] // CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast double* [[REF_TMP6]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR6]] -// CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP6]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR12]] -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast double* [[REF_TMP6]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP25]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB3]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast double* [[REF_TMP5]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP27]]) #[[ATTR6]] +// CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP5]], align 8, !tbaa [[TBAA22]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.2"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP5]]) #[[ATTR11]] +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast double* [[REF_TMP5]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP28]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast double* [[REF_TMP]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP29]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast i32* [[I6]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP29]], [[TMP30]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP7:%.*]] = icmp ugt i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE]] ], [ [[TMP32]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP35]], [[COND_TRUE]] ], [ [[TMP36]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP35]], 1 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP34]], [[ADD9]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP37]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD8:%.*]] = add i32 [[TMP39]], 1 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP38]], [[ADD8]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] // CHECK1: omp.dispatch.cleanup: // CHECK1-NEXT: br label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP37]], 1 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP36]], [[ADD11]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD10:%.*]] = add i32 [[TMP41]], 1 +// CHECK1-NEXT: [[CMP11:%.*]] = icmp ult i32 [[TMP40]], [[ADD10]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP39]], 1 -// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP40]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP41:%.*]] = bitcast double* [[REF_TMP15]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP41]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to double -// CHECK1-NEXT: store double [[CONV]], double* [[REF_TMP15]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: [[TMP43:%.*]] = bitcast double* [[REF_TMP16]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP43]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to double -// CHECK1-NEXT: store double [[CONV17]], double* [[REF_TMP16]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR12]] -// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR12]] -// CHECK1-NEXT: [[TMP45:%.*]] = bitcast double* [[REF_TMP16]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP46:%.*]] = bitcast double* [[REF_TMP15]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP46]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP47]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP43]], 1 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP42]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[I6]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP44:%.*]] = bitcast %"class.std::complex.2"* [[REF_TMP13]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP44]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast double* [[REF_TMP14]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[I6]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP46]] to double +// CHECK1-NEXT: store double [[CONV]], double* [[REF_TMP14]], align 8, !tbaa [[TBAA22]] +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast double* [[REF_TMP15]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP47]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[I6]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CONV16:%.*]] = sitofp i32 [[TMP48]] to double +// CHECK1-NEXT: store double [[CONV16]], double* [[REF_TMP15]], align 8, !tbaa [[TBAA22]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.2"* nonnull align 8 dereferenceable(16) [[REF_TMP13]], double* nonnull align 8 dereferenceable(8) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]]) #[[ATTR11]] +// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.2"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.2"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM]], %"class.std::complex.2"* nonnull align 8 dereferenceable(16) [[REF_TMP13]]) #[[ATTR11]] +// CHECK1-NEXT: [[TMP49:%.*]] = bitcast double* [[REF_TMP15]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP49]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP50:%.*]] = bitcast double* [[REF_TMP14]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP50]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP51:%.*]] = bitcast %"class.std::complex.2"* [[REF_TMP13]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP51]]) #[[ATTR6]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP48]], 1 -// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD17:%.*]] = add i32 [[TMP52]], 1 +// CHECK1-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP49]], [[TMP50]] -// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP51]], [[TMP52]] -// CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP55]], [[TMP56]] +// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP54]]) -// CHECK1-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP58:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* -// CHECK1-NEXT: store i8* [[TMP58]], i8** [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP60:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP56]], i32 1, i64 8, i8* [[TMP59]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func5, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func6) -// CHECK1-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 -// CHECK1-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1-NEXT: [[TMP57:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP57]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP58]]) +// CHECK1-NEXT: [[TMP59:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP59]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP62:%.*]] = bitcast %"class.std::complex.2"* [[PARTIAL_SUM]] to i8* +// CHECK1-NEXT: store i8* [[TMP62]], i8** [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP64:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP60]], i32 1, i64 8, i8* [[TMP63]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func5, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func6) +// CHECK1-NEXT: [[TMP65:%.*]] = icmp eq i32 [[TMP64]], 1 +// CHECK1-NEXT: br i1 [[TMP65]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR12]] -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) +// CHECK1-NEXT: [[CALL20:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.2"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.2"* nonnull align 8 dereferenceable(16) [[TMP6]], %"class.std::complex.2"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM]]) #[[ATTR11]] +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP60]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: -// CHECK1-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP63]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* +// CHECK1-NEXT: [[TMP66:%.*]] = bitcast i32* [[I6]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR6]] -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: -// CHECK1-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* +// CHECK1-NEXT: [[TMP67:%.*]] = bitcast %"class.std::complex.2"* [[PARTIAL_SUM]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP67]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* +// CHECK1-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* +// CHECK1-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK1-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR6]] +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: [[TMP72:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP72]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP73:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP73]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP74:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP74]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP75:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP75]]) #[[ATTR6]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIdEpLIdEERS0_RKS_IT_E -// CHECK1-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[__C:%.*]]) #[[ATTR5]] comdat align 2 { +// CHECK1-SAME: (%"class.std::complex.2"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], %"class.std::complex.2"* nonnull align 8 dereferenceable(16) [[__C:%.*]]) #[[ATTR5]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 -// CHECK1-NEXT: [[__C_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 -// CHECK1-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: store %"class.std::complex.0"* [[__C]], %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR12]] -// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.2"*, align 8 +// CHECK1-NEXT: [[__C_ADDR:%.*]] = alloca %"class.std::complex.2"*, align 8 +// CHECK1-NEXT: store %"class.std::complex.2"* [[THIS]], %"class.std::complex.2"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store %"class.std::complex.2"* [[__C]], %"class.std::complex.2"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.2"*, %"class.std::complex.2"** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %"class.std::complex.2"*, %"class.std::complex.2"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.2"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR11]] +// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.2", %"class.std::complex.2"* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]] // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]] // CHECK1-NEXT: store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] -// CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR12]] -// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex.2"*, %"class.std::complex.2"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.2"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR11]] +// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.2", %"class.std::complex.2"* [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]] // CHECK1-NEXT: [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]] // CHECK1-NEXT: store double [[ADD3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]] -// CHECK1-NEXT: ret %"class.std::complex.0"* [[THIS1]] +// CHECK1-NEXT: ret %"class.std::complex.2"* [[THIS1]] // // // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func5 -// CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex.0", align 8 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex.2", align 8 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] @@ -1020,13 +1029,13 @@ // CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex.0"** -// CHECK1-NEXT: [[TMP11:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex.2"** +// CHECK1-NEXT: [[TMP11:%.*]] = load %"class.std::complex.2"*, %"class.std::complex.2"** [[TMP10]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex.0", %"class.std::complex.0"* [[TMP11]], i64 1 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex.0"* [[TMP13]] to i8* -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex.0"* [[TMP11]] to i64* -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex.2", %"class.std::complex.2"* [[TMP11]], i64 1 +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex.2"* [[TMP13]] to i8* +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex.2"* [[TMP11]] to i64* +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex.2"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* // CHECK1-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] // CHECK1: .shuffle.pre_cond: // CHECK1-NEXT: [[TMP17:%.*]] = phi i64* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[TMP29:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] @@ -1048,7 +1057,7 @@ // CHECK1-NEXT: [[TMP30]] = getelementptr i64, i64* [[TMP18]], i64 1 // CHECK1-NEXT: br label [[DOTSHUFFLE_PRE_COND]] // CHECK1: .shuffle.exit: -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast %"class.std::complex.2"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* // CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP12]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP32:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK1-NEXT: [[TMP33:%.*]] = icmp eq i16 [[TMP8]], 1 @@ -1077,13 +1086,13 @@ // CHECK1-NEXT: br i1 [[TMP48]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK1: then4: // CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP50:%.*]] = bitcast i8** [[TMP49]] to %"class.std::complex.0"** -// CHECK1-NEXT: [[TMP51:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = bitcast i8** [[TMP49]] to %"class.std::complex.2"** +// CHECK1-NEXT: [[TMP51:%.*]] = load %"class.std::complex.2"*, %"class.std::complex.2"** [[TMP50]], align 8 // CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP53:%.*]] = bitcast i8** [[TMP52]] to %"class.std::complex.0"** -// CHECK1-NEXT: [[TMP54:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP53]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = bitcast %"class.std::complex.0"* [[TMP54]] to i8* -// CHECK1-NEXT: [[TMP56:%.*]] = bitcast %"class.std::complex.0"* [[TMP51]] to i8* +// CHECK1-NEXT: [[TMP53:%.*]] = bitcast i8** [[TMP52]] to %"class.std::complex.2"** +// CHECK1-NEXT: [[TMP54:%.*]] = load %"class.std::complex.2"*, %"class.std::complex.2"** [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = bitcast %"class.std::complex.2"* [[TMP54]] to i8* +// CHECK1-NEXT: [[TMP56:%.*]] = bitcast %"class.std::complex.2"* [[TMP51]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP55]], i8* align 8 [[TMP56]], i64 16, i1 false), !tbaa.struct !27 // CHECK1-NEXT: br label [[IFCONT6:%.*]] // CHECK1: else5: @@ -1093,7 +1102,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func6 -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -1154,27 +1163,20 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex.0"** -// CHECK1-NEXT: [[TMP11:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP10]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex.0"* [[TMP11]]) #[[ATTR6]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[GLOBAL_ARGS]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.anon.3** +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.3** [[TMP3]] to %struct.anon.3* +// CHECK1-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.3* [[TMP4]]) #[[ATTR6]] // CHECK1-NEXT: ret void // // @@ -1222,20 +1224,20 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC2ERKdS2_ -// CHECK1-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK1-SAME: (%"class.std::complex.2"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.2"*, align 8 // CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store %"class.std::complex.2"* [[THIS]], %"class.std::complex.2"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.2"*, %"class.std::complex.2"** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.2", %"class.std::complex.2"* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP1:%.*]] = load double, double* [[TMP0]], align 8, !tbaa [[TBAA22]] // CHECK1-NEXT: store double [[TMP1]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] -// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 +// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.2", %"class.std::complex.2"* [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP3:%.*]] = load double, double* [[TMP2]], align 8, !tbaa [[TBAA22]] // CHECK1-NEXT: store double [[TMP3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]] @@ -1243,23 +1245,23 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4realEv -// CHECK1-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { +// CHECK1-SAME: (%"class.std::complex.2"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 -// CHECK1-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.2"*, align 8 +// CHECK1-NEXT: store %"class.std::complex.2"* [[THIS]], %"class.std::complex.2"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.2"*, %"class.std::complex.2"** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.2", %"class.std::complex.2"* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24]] // CHECK1-NEXT: ret double [[TMP0]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4imagEv -// CHECK1-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { +// CHECK1-SAME: (%"class.std::complex.2"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 -// CHECK1-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.2"*, align 8 +// CHECK1-NEXT: store %"class.std::complex.2"* [[THIS]], %"class.std::complex.2"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.2"*, %"class.std::complex.2"** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.2", %"class.std::complex.2"* [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP0:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26]] // CHECK1-NEXT: ret double [[TMP0]] // diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -53,7 +53,7 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 @@ -63,13 +63,12 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i8* -// CHECK1-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK1-NEXT: store i8 [[TMP3]], i8* [[TMP2]], align 1 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -77,16 +76,20 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK1-NEXT: store i8 49, i8* [[CONV]], align 1 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], i8* [[A]], align 1 +// CHECK1-NEXT: store i8 49, i8* [[A]], align 1 // CHECK1-NEXT: ret void // // @@ -94,7 +97,7 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 @@ -104,13 +107,12 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 2 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -118,16 +120,20 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: store i16 1, i16* [[CONV]], align 2 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK1-NEXT: store i16 1, i16* [[AA]], align 2 // CHECK1-NEXT: ret void // // @@ -135,7 +141,7 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 2 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 @@ -145,13 +151,12 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 2 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -159,58 +164,78 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP1:%.*]] = bitcast i16* [[CONV]] to i8* -// CHECK1-NEXT: store i8* [[TMP1]], i8** [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP4]], i64 1) +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i16* [[AA]], i16** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.2* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP5:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP4]], i8* [[TMP5]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to %struct.anon.2* +// CHECK1-NEXT: store [[STRUCT_ANON_2]] [[TMP7]], %struct.anon.2* [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.2*)* @__omp_outlined__3 to i8*), i8* null, i8* [[TMP6]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP5]], i64 8) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP2]], i8** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__4 to i8*), i8* null, i8** [[TMP5]], i64 1) +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i16* [[TMP2]], i16** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.3* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP5:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP4]], i8* [[TMP5]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to %struct.anon.3* +// CHECK1-NEXT: store [[STRUCT_ANON_3]] [[TMP7]], %struct.anon.3* [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.3*)* @__omp_outlined__4 to i8*), i8* null, i8* [[TMP6]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP5]], i64 8) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i16 1, i16* [[TMP0]], align 2 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 8 +// CHECK1-NEXT: store i16 1, i16* [[TMP2]], align 2 // CHECK1-NEXT: ret void // // @@ -218,7 +243,7 @@ // CHECK2-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 @@ -228,13 +253,12 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[A_CASTED]] to i8* -// CHECK2-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK2-NEXT: store i8 [[TMP3]], i8* [[TMP2]], align 1 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -242,16 +266,20 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i8, align 1 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[A_ADDR]] to i8* -// CHECK2-NEXT: store i8 49, i8* [[CONV]], align 1 +// CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK2-NEXT: store i8 [[TMP2]], i8* [[A]], align 1 +// CHECK2-NEXT: store i8 49, i8* [[A]], align 1 // CHECK2-NEXT: ret void // // @@ -259,7 +287,7 @@ // CHECK2-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 @@ -269,13 +297,12 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK2-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 2 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -283,16 +310,20 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK2-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK2-NEXT: store i16 1, i16* [[CONV]], align 2 +// CHECK2-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK2-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK2-NEXT: store i16 1, i16* [[AA]], align 2 // CHECK2-NEXT: ret void // // @@ -300,7 +331,7 @@ // CHECK2-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 2 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 @@ -310,13 +341,12 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK2-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 2 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -324,57 +354,77 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK2-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP1:%.*]] = bitcast i16* [[CONV]] to i8* -// CHECK2-NEXT: store i8* [[TMP1]], i8** [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP4]], i32 1) +// CHECK2-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK2-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i16* [[AA]], i16** [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.2* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP5:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP4]], i8* [[TMP5]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to %struct.anon.2* +// CHECK2-NEXT: store [[STRUCT_ANON_2]] [[TMP7]], %struct.anon.2* [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.2*)* @__omp_outlined__3 to i8*), i8* null, i8* [[TMP6]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP5]], i32 4) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP2]], i8** [[TMP1]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__4 to i8*), i8* null, i8** [[TMP5]], i32 1) +// CHECK2-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i16* [[TMP2]], i16** [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.3* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP5:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP4]], i8* [[TMP5]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to %struct.anon.3* +// CHECK2-NEXT: store [[STRUCT_ANON_3]] [[TMP7]], %struct.anon.3* [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.3*)* @__omp_outlined__4 to i8*), i8* null, i8* [[TMP6]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP5]], i32 4) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i16*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 -// CHECK2-NEXT: store i16 1, i16* [[TMP0]], align 2 +// CHECK2-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 4 +// CHECK2-NEXT: store i16 1, i16* [[TMP2]], align 2 // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp @@ -35,6 +35,7 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16 // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) @@ -44,7 +45,7 @@ // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -52,86 +53,97 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: [[I:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[I_ON_STACK:%.*]] = bitcast i8* [[I]] to i32* // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I_ON_STACK]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i32* [[I_ON_STACK]] to i8* -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP10]], i64 1) +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[I_ON_STACK]], i32** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP11:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP12:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP10]], i8* [[TMP11]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP12]] to %struct.anon.0* +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP13]], %struct.anon.0* [[TMP14]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8* [[TMP12]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP11]], i64 8) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[I]], i64 4) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -141,22 +153,22 @@ // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[GLOBAL_ARGS]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.anon.0** +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.0** [[TMP3]] to %struct.anon.0* +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.0* [[TMP4]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16 // CHECK2-SAME: () #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) @@ -166,7 +178,7 @@ // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -174,86 +186,97 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK2-NEXT: [[I:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: [[I_ON_STACK:%.*]] = bitcast i8* [[I]] to i32* // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I_ON_STACK]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i32* [[I_ON_STACK]] to i8* -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP10]], i32 1) +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i32* [[I_ON_STACK]], i32** [[TMP9]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP11:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP12:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP10]], i8* [[TMP11]]) +// CHECK2-NEXT: [[TMP13:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP12]] to %struct.anon.0* +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP13]], %struct.anon.0* [[TMP14]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8* [[TMP12]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP11]], i32 4) // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[I]], i32 4) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK2-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK2-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK2-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK2-NEXT: ret void // // @@ -263,15 +286,14 @@ // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR5]] +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[GLOBAL_ARGS]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.anon.0** +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.0** [[TMP3]] to %struct.anon.0* +// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.0* [[TMP4]]) #[[ATTR4]] // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -77,8 +77,7 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 // CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 @@ -92,17 +91,17 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [1000 x i32]* [[TMP0]], [1000 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -110,287 +109,299 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK1-NEXT: [[L2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) -// CHECK1-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L2]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[L]], align 4 +// CHECK1-NEXT: [[L1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L1]] to i32* +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP18]], [[ADD]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[L_CASTED]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP29:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK1-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP21]] to i8* -// CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i64 5) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[L]], align 4 +// CHECK1-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP34:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 40) +// CHECK1-NEXT: [[TMP35:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP33]], i8* [[TMP34]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP35]] to %struct.anon.0* +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP36]], %struct.anon.0* [[TMP37]], align 1 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP39]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP35]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP34]], i64 40) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] -// CHECK1: cond.true14: -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: br label [[COND_END16:%.*]] -// CHECK1: cond.false15: +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END16]] -// CHECK1: cond.end16: -// CHECK1-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE14]] ], [ [[TMP44]], [[COND_FALSE15]] ] -// CHECK1-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP46]], [[TMP47]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK1: cond.true11: +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: br label [[COND_END13:%.*]] +// CHECK1: cond.false12: +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END13]] +// CHECK1: cond.end13: +// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP48]], [[COND_TRUE11]] ], [ [[TMP49]], [[COND_FALSE12]] ] +// CHECK1-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP50]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]]) -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 -// CHECK1-NEXT: br i1 [[TMP49]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP52]]) +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 +// CHECK1-NEXT: br i1 [[TMP54]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: store i32 [[TMP50]], i32* [[CONV1]], align 4 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[L]], align 4 +// CHECK1-NEXT: store i32 [[TMP55]], i32* [[L]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[L2]], i64 4) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[L1]], i64 4) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[L]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP18]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP10]] to i32 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP9]], [[CONV7]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP20]] to i32 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP19]], [[CONV5]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP21]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV7]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], i32* [[L]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP37]]) +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK1-NEXT: br i1 [[TMP39]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[L]], align 4 +// CHECK1-NEXT: store i32 [[TMP40]], i32* [[L]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -403,7 +414,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 @@ -415,13 +426,14 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [1000 x i16]* [[TMP0]], [1000 x i16]** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -429,12 +441,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i16]* noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -444,133 +456,141 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [1000 x i16]*, [1000 x i16]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP16]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP30]], i64 4) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [1000 x i16]* [[TMP4]], [1000 x i16]** [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast %struct.anon.3* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP30:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 32) +// CHECK1-NEXT: [[TMP31:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP29]], i8* [[TMP30]]) +// CHECK1-NEXT: [[TMP32:%.*]] = load [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i8* [[TMP31]] to %struct.anon.3* +// CHECK1-NEXT: store [[STRUCT_ANON_3]] [[TMP32]], %struct.anon.3* [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP35]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.3*)* @__omp_outlined__3 to i8*), i8* null, i8* [[TMP31]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP30]], i64 32) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK1: cond.true11: -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: br label [[COND_END13:%.*]] -// CHECK1: cond.false12: +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END13]] -// CHECK1: cond.end13: -// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ] -// CHECK1-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP41]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK1: cond.true10: +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END12:%.*]] +// CHECK1: cond.false11: +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END12]] +// CHECK1: cond.end12: +// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP44]], [[COND_TRUE10]] ], [ [[TMP45]], [[COND_FALSE11]] ] +// CHECK1-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP46]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP43]]) +// CHECK1-NEXT: [[TMP47:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[TMP47]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP48]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i16]* noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -580,78 +600,85 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load [1000 x i16]*, [1000 x i16]** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -// CHECK1-NEXT: [[CONV8:%.*]] = sext i16 [[TMP14]] to i32 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], 1 -// CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i16 -// CHECK1-NEXT: store i16 [[CONV10]], i16* [[ARRAYIDX]], align 2 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP22]] to i32 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[CONV7]], 1 +// CHECK1-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD8]] to i16 +// CHECK1-NEXT: store i16 [[CONV9]], i16* [[ARRAYIDX]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -661,6 +688,7 @@ // CHECK1-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 @@ -670,9 +698,11 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -680,108 +710,120 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_5]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 10 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK1-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP17]], i64 3) +// CHECK1-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP14]], i64* [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP16]], i64* [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.anon.5* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP19:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 24) +// CHECK1-NEXT: [[TMP20:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP18]], i8* [[TMP19]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP20]] to %struct.anon.5* +// CHECK1-NEXT: store [[STRUCT_ANON_5]] [[TMP21]], %struct.anon.5* [[TMP22]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.5*)* @__omp_outlined__5 to i8*), i8* null, i8* [[TMP20]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP19]], i64 24) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP29]], 9 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: -// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP30]], [[COND_FALSE6]] ] // CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -791,56 +833,62 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -849,7 +897,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 // CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 @@ -861,13 +909,14 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP0]], [10 x [10 x i32]]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -875,12 +924,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK1-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -888,110 +937,118 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[F]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP10]], 100 // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP12]] to i8* -// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x [10 x i32]]*, i64)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP21]], i64 4) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK1-NEXT: store i64 [[TMP14]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP16]], i64* [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP18]], i64* [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP2]], [10 x [10 x i32]]** [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[F]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], i32* [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.anon.8* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP23:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 32) +// CHECK1-NEXT: [[TMP24:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP22]], i8* [[TMP23]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i8* [[TMP24]] to %struct.anon.8* +// CHECK1-NEXT: store [[STRUCT_ANON_8]] [[TMP25]], %struct.anon.8* [[TMP26]], align 1 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.8*)* @__omp_outlined__7 to i8*), i8* null, i8* [[TMP24]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP23]], i64 32) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP28]], 99 -// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] -// CHECK1: cond.true7: -// CHECK1-NEXT: br label [[COND_END9:%.*]] -// CHECK1: cond.false8: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END9]] -// CHECK1: cond.end9: -// CHECK1-NEXT: [[COND10:%.*]] = phi i32 [ 99, [[COND_TRUE7]] ], [ [[TMP29]], [[COND_FALSE8]] ] -// CHECK1-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP33]], 99 +// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK1: cond.true6: +// CHECK1-NEXT: br label [[COND_END8:%.*]] +// CHECK1: cond.false7: +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END8]] +// CHECK1: cond.end8: +// CHECK1-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP34]], [[COND_FALSE7]] ] +// CHECK1-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP35]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1004,76 +1061,83 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[F]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV4]], [[TMP7]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 10 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[TMP10]], 10 -// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[DIV5]], 10 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL6]] -// CHECK1-NEXT: [[MUL7:%.*]] = mul nsw i32 [[SUB]], 1 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[J]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 10 +// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[J]], align 4 // CHECK1-NEXT: store i32 10, i32* [[K]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD10]], [[TMP14]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 -// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM12]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX13]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[J]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[F]], align 4 +// CHECK1-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[MUL8]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[K]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[ARRAYIDX12]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -1082,7 +1146,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 @@ -1094,13 +1158,14 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP0]], [10 x [10 x i32]]** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -1108,12 +1173,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__8 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1126,146 +1191,154 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I10:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[J11:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_11]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i32 [[DIV]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK1-NEXT: [[DIV6:%.*]] = sdiv i32 [[SUB5]], 1 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[DIV6]] to i64 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], [[CONV7]] -// CHECK1-NEXT: [[SUB8:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK1-NEXT: store i64 [[SUB8]], i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP8]], 0 +// CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK1-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 // CHECK1-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp slt i32 0, [[TMP6]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP10]] +// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[CONV12:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV12]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 91, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP14]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1 -// CHECK1-NEXT: [[CMP14:%.*]] = icmp slt i64 [[TMP15]], [[ADD]] -// CHECK1-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP20]], 1 +// CHECK1-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP19]], [[ADD]] +// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV15:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV15]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to i8* -// CHECK1-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to i8* -// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [10 x [10 x i32]]*)* @__omp_outlined__9 to i8*), i8* null, i8** [[TMP31]], i64 4) +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP22]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP24]], i64* [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP26]], i64* [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], i32* [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP4]], [10 x [10 x i32]]** [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast %struct.anon.11* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP31:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 32) +// CHECK1-NEXT: [[TMP32:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP30]], i8* [[TMP31]]) +// CHECK1-NEXT: [[TMP33:%.*]] = load [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i8* [[TMP32]] to %struct.anon.11* +// CHECK1-NEXT: store [[STRUCT_ANON_11]] [[TMP33]], %struct.anon.11* [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP36]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.11*)* @__omp_outlined__9 to i8*), i8* null, i8* [[TMP32]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP31]], i64 32) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP32]], [[TMP33]] -// CHECK1-NEXT: store i64 [[ADD16]], i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP34]], [[TMP35]] -// CHECK1-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] -// CHECK1-NEXT: store i64 [[ADD18]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: [[CMP19:%.*]] = icmp sgt i64 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: br i1 [[CMP19]], label [[COND_TRUE20:%.*]], label [[COND_FALSE21:%.*]] -// CHECK1: cond.true20: -// CHECK1-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: br label [[COND_END22:%.*]] -// CHECK1: cond.false21: +// CHECK1-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_COMB_LB]], align 8 // CHECK1-NEXT: [[TMP41:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: br label [[COND_END22]] -// CHECK1: cond.end22: -// CHECK1-NEXT: [[COND23:%.*]] = phi i64 [ [[TMP40]], [[COND_TRUE20]] ], [ [[TMP41]], [[COND_FALSE21]] ] -// CHECK1-NEXT: store i64 [[COND23]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP42]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: store i64 [[ADD16]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP43]], [[TMP44]] +// CHECK1-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]] +// CHECK1: cond.true18: +// CHECK1-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: br label [[COND_END20:%.*]] +// CHECK1: cond.false19: +// CHECK1-NEXT: [[TMP46:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: br label [[COND_END20]] +// CHECK1: cond.end20: +// CHECK1-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP45]], [[COND_TRUE18]] ], [ [[TMP46]], [[COND_FALSE19]] ] +// CHECK1-NEXT: store i64 [[COND21]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP47]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP44]]) +// CHECK1-NEXT: [[TMP48:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP48]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP49]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1278,117 +1351,124 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I10:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[J11:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i32 [[DIV]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK1-NEXT: [[DIV6:%.*]] = sdiv i32 [[SUB5]], 1 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[DIV6]] to i64 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], [[CONV7]] -// CHECK1-NEXT: [[SUB8:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK1-NEXT: store i64 [[SUB8]], i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP12]], 0 +// CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK1-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 // CHECK1-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp slt i32 0, [[TMP6]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP14]] +// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB3]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP20]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp ule i64 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CMP11:%.*]] = icmp ule i64 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP16]], 0 -// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 -// CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] -// CHECK1-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 -// CHECK1-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP15]], [[CONV16]] -// CHECK1-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK1-NEXT: store i32 [[CONV19]], i32* [[I10]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP19]], 0 -// CHECK1-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 -// CHECK1-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] -// CHECK1-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 -// CHECK1-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP18]], [[CONV23]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK1-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 -// CHECK1-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] -// CHECK1-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 -// CHECK1-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] -// CHECK1-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP17]], [[MUL29]] -// CHECK1-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 -// CHECK1-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] -// CHECK1-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 -// CHECK1-NEXT: store i32 [[CONV33]], i32* [[J11]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I10]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[J11]], align 4 -// CHECK1-NEXT: [[ADD34:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I10]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[J11]], align 4 -// CHECK1-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM35]] -// CHECK1-NEXT: store i32 [[ADD34]], i32* [[ARRAYIDX36]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK1-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 +// CHECK1-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]] +// CHECK1-NEXT: [[CONV15:%.*]] = sext i32 [[MUL14]] to i64 +// CHECK1-NEXT: [[DIV16:%.*]] = sdiv i64 [[TMP23]], [[CONV15]] +// CHECK1-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]] +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK1-NEXT: store i32 [[CONV18]], i32* [[I9]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK1-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 +// CHECK1-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]] +// CHECK1-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64 +// CHECK1-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP26]], [[CONV22]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK1-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 +// CHECK1-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] +// CHECK1-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 +// CHECK1-NEXT: [[MUL28:%.*]] = mul nsw i64 [[DIV23]], [[CONV27]] +// CHECK1-NEXT: [[SUB29:%.*]] = sub nsw i64 [[TMP25]], [[MUL28]] +// CHECK1-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1 +// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]] +// CHECK1-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32 +// CHECK1-NEXT: store i32 [[CONV32]], i32* [[J10]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[I9]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[J10]], align 4 +// CHECK1-NEXT: [[ADD33:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[I9]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[J10]], align 4 +// CHECK1-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP32]] to i64 +// CHECK1-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM34]] +// CHECK1-NEXT: store i32 [[ADD33]], i32* [[ARRAYIDX35]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] -// CHECK1-NEXT: store i64 [[ADD37]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: store i64 [[ADD36]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP36]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -1400,7 +1480,7 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 // CHECK1-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 @@ -1413,14 +1493,17 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[V_ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [1000 x i32]* [[TMP0]], [1000 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[V_ADDR]], align 8 +// CHECK1-NEXT: store i32* [[TMP7]], i32** [[TMP6]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -1428,13 +1511,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1444,139 +1526,145 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_13]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[V_ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK1-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK1-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP20]] to i8* -// CHECK1-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @__omp_outlined__11 to i8*), i8* null, i8** [[TMP33]], i64 5) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK1-NEXT: store i64 [[TMP22]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP24]], i64* [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP26]], i64* [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], i32* [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: store i32* [[TMP31]], i32** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.anon.13* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP33:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 40) +// CHECK1-NEXT: [[TMP34:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP32]], i8* [[TMP33]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = bitcast i8* [[TMP34]] to %struct.anon.13* +// CHECK1-NEXT: store [[STRUCT_ANON_13]] [[TMP35]], %struct.anon.13* [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP38]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.13*)* @__omp_outlined__11 to i8*), i8* null, i8* [[TMP34]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP33]], i64 40) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK1: cond.true11: -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: br label [[COND_END13:%.*]] -// CHECK1: cond.false12: +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END13]] -// CHECK1: cond.end13: -// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] -// CHECK1-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP44]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK1: cond.true10: +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END12:%.*]] +// CHECK1: cond.false11: +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END12]] +// CHECK1: cond.end12: +// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP47]], [[COND_TRUE10]] ], [ [[TMP48]], [[COND_FALSE11]] ] +// CHECK1-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP49]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP46]]) +// CHECK1-NEXT: [[TMP50:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[TMP50]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP51]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__11 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1586,80 +1674,87 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP17]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP20]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[V_ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM8]] -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX9]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP22]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i64 0, i64 [[IDXPROM7]] +// CHECK1-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX8]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) +// CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -1671,8 +1766,7 @@ // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 // CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 @@ -1686,17 +1780,17 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store [1000 x i32]* [[TMP0]], [1000 x i32]** [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1704,287 +1798,299 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK2-NEXT: [[L2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) -// CHECK2-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L2]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[L]], align 4 +// CHECK2-NEXT: [[L1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) +// CHECK2-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L1]] to i32* +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP18]], [[ADD]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[L_CASTED]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK2-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: [[TMP29:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8 -// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK2-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP21]] to i8* -// CHECK2-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i64 5) +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK2-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK2-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP30]], align 8 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[L]], align 4 +// CHECK2-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 8 +// CHECK2-NEXT: [[TMP33:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP34:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 40) +// CHECK2-NEXT: [[TMP35:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP33]], i8* [[TMP34]]) +// CHECK2-NEXT: [[TMP36:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK2-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP35]] to %struct.anon.0* +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP36]], %struct.anon.0* [[TMP37]], align 1 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP39]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP35]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP34]], i64 40) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] -// CHECK2-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] -// CHECK2: cond.true14: -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: br label [[COND_END16:%.*]] -// CHECK2: cond.false15: +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END16]] -// CHECK2: cond.end16: -// CHECK2-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE14]] ], [ [[TMP44]], [[COND_FALSE15]] ] -// CHECK2-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP46]], [[TMP47]] +// CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK2: cond.true11: +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: br label [[COND_END13:%.*]] +// CHECK2: cond.false12: +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END13]] +// CHECK2: cond.end13: +// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP48]], [[COND_TRUE11]] ], [ [[TMP49]], [[COND_FALSE12]] ] +// CHECK2-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP50]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]]) -// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 -// CHECK2-NEXT: br i1 [[TMP49]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK2-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP52]]) +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 +// CHECK2-NEXT: br i1 [[TMP54]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK2-NEXT: store i32 [[TMP50]], i32* [[CONV1]], align 4 +// CHECK2-NEXT: [[TMP55:%.*]] = load i32, i32* [[L]], align 4 +// CHECK2-NEXT: store i32 [[TMP55]], i32* [[L]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[L2]], i64 4) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[L1]], i64 4) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[L]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK2-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK2-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP18]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP10]] to i32 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP9]], [[CONV7]] -// CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP20]] to i32 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP19]], [[CONV5]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP21]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV7]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK2-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], i32* [[L]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK2-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK2-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK2-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP37]]) +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK2-NEXT: br i1 [[TMP39]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK2-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[L]], align 4 +// CHECK2-NEXT: store i32 [[TMP40]], i32* [[L]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -1997,7 +2103,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 @@ -2009,13 +2115,14 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store [1000 x i16]* [[TMP0]], [1000 x i16]** [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -2023,12 +2130,12 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i16]* noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2038,133 +2145,141 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load [1000 x i16]*, [1000 x i16]** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP16]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: [[TMP27:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP30]], i64 4) +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK2-NEXT: store i64 [[TMP19]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK2-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP23]], i64* [[TMP22]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store [1000 x i16]* [[TMP4]], [1000 x i16]** [[TMP28]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = bitcast %struct.anon.3* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP30:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 32) +// CHECK2-NEXT: [[TMP31:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP29]], i8* [[TMP30]]) +// CHECK2-NEXT: [[TMP32:%.*]] = load [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK2-NEXT: [[TMP33:%.*]] = bitcast i8* [[TMP31]] to %struct.anon.3* +// CHECK2-NEXT: store [[STRUCT_ANON_3]] [[TMP32]], %struct.anon.3* [[TMP33]], align 8 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP35]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.3*)* @__omp_outlined__3 to i8*), i8* null, i8* [[TMP31]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP30]], i64 32) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] -// CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK2: cond.true11: -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: br label [[COND_END13:%.*]] -// CHECK2: cond.false12: +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END13]] -// CHECK2: cond.end13: -// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ] -// CHECK2-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP41]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK2: cond.true10: +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: br label [[COND_END12:%.*]] +// CHECK2: cond.false11: +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END12]] +// CHECK2: cond.end12: +// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP44]], [[COND_TRUE10]] ], [ [[TMP45]], [[COND_FALSE11]] ] +// CHECK2-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP46]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP43]]) +// CHECK2-NEXT: [[TMP47:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[TMP47]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP48]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i16]* noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2174,78 +2289,85 @@ // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load [1000 x i16]*, [1000 x i16]** [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK2-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK2-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP19]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -// CHECK2-NEXT: [[CONV8:%.*]] = sext i16 [[TMP14]] to i32 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], 1 -// CHECK2-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i16 -// CHECK2-NEXT: store i16 [[CONV10]], i16* [[ARRAYIDX]], align 2 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +// CHECK2-NEXT: [[CONV7:%.*]] = sext i16 [[TMP22]] to i32 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[CONV7]], 1 +// CHECK2-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD8]] to i16 +// CHECK2-NEXT: store i16 [[CONV9]], i16* [[ARRAYIDX]], align 2 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]]) +// CHECK2-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -2255,6 +2377,7 @@ // CHECK2-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 @@ -2264,9 +2387,11 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP3]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -2274,108 +2399,120 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_5]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK2-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 10 // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP16:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP17]], i64 3) +// CHECK2-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK2-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP14]], i64* [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP16]], i64* [[TMP15]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP17]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = bitcast %struct.anon.5* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP19:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 24) +// CHECK2-NEXT: [[TMP20:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP18]], i8* [[TMP19]]) +// CHECK2-NEXT: [[TMP21:%.*]] = load [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP20]] to %struct.anon.5* +// CHECK2-NEXT: store [[STRUCT_ANON_5]] [[TMP21]], %struct.anon.5* [[TMP22]], align 8 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.5*)* @__omp_outlined__5 to i8*), i8* null, i8* [[TMP20]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP19]], i64 24) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP29]], 9 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK2: cond.true5: // CHECK2-NEXT: br label [[COND_END7:%.*]] // CHECK2: cond.false6: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END7]] // CHECK2: cond.end7: -// CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] +// CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP30]], [[COND_FALSE6]] ] // CHECK2-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2385,56 +2522,62 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK2-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK2-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]]) // CHECK2-NEXT: ret void // // @@ -2443,7 +2586,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 // CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 @@ -2455,13 +2598,14 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store [10 x [10 x i32]]* [[TMP0]], [10 x [10 x i32]]** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -2469,12 +2613,12 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK2-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2482,110 +2626,118 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* +// CHECK2-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[F]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP10]], 100 // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK2-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP18:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP12]] to i8* -// CHECK2-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x [10 x i32]]*, i64)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP21]], i64 4) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK2-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK2-NEXT: store i64 [[TMP14]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP16]], i64* [[TMP15]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP18]], i64* [[TMP17]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store [10 x [10 x i32]]* [[TMP2]], [10 x [10 x i32]]** [[TMP19]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[F]], align 4 +// CHECK2-NEXT: store i32 [[TMP21]], i32* [[TMP20]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = bitcast %struct.anon.8* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP23:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 32) +// CHECK2-NEXT: [[TMP24:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP22]], i8* [[TMP23]]) +// CHECK2-NEXT: [[TMP25:%.*]] = load [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK2-NEXT: [[TMP26:%.*]] = bitcast i8* [[TMP24]] to %struct.anon.8* +// CHECK2-NEXT: store [[STRUCT_ANON_8]] [[TMP25]], %struct.anon.8* [[TMP26]], align 1 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.8*)* @__omp_outlined__7 to i8*), i8* null, i8* [[TMP24]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP23]], i64 32) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK2-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP28]], 99 -// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] -// CHECK2: cond.true7: -// CHECK2-NEXT: br label [[COND_END9:%.*]] -// CHECK2: cond.false8: -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END9]] -// CHECK2: cond.end9: -// CHECK2-NEXT: [[COND10:%.*]] = phi i32 [ 99, [[COND_TRUE7]] ], [ [[TMP29]], [[COND_FALSE8]] ] -// CHECK2-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP33]], 99 +// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK2: cond.true6: +// CHECK2-NEXT: br label [[COND_END8:%.*]] +// CHECK2: cond.false7: +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END8]] +// CHECK2: cond.end8: +// CHECK2-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP34]], [[COND_FALSE7]] ] +// CHECK2-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP35]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2598,76 +2750,83 @@ // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* +// CHECK2-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK2-NEXT: store i32 [[TMP8]], i32* [[F]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK2-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK2-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV4:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV4]], [[TMP7]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 10 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[DIV5:%.*]] = sdiv i32 [[TMP10]], 10 -// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[DIV5]], 10 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL6]] -// CHECK2-NEXT: [[MUL7:%.*]] = mul nsw i32 [[SUB]], 1 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[J]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 10 +// CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] +// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[J]], align 4 // CHECK2-NEXT: store i32 10, i32* [[K]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD10]], [[TMP14]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 -// CHECK2-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM12]] -// CHECK2-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX13]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[J]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[F]], align 4 +// CHECK2-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[MUL8]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[K]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP22]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[J]], align 4 +// CHECK2-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK2-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[ARRAYIDX12]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]]) // CHECK2-NEXT: ret void // // @@ -2676,7 +2835,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 @@ -2688,13 +2847,14 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store [10 x [10 x i32]]* [[TMP0]], [10 x [10 x i32]]** [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -2702,12 +2862,12 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__8 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2720,145 +2880,153 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I8:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[J9:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_11]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK2-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK2-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], [[DIV5]] // CHECK2-NEXT: [[SUB6:%.*]] = sub nsw i32 [[MUL]], 1 // CHECK2-NEXT: store i32 [[SUB6]], i32* [[DOTCAPTURE_EXPR_3]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 // CHECK2-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: land.lhs.true: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP10]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK2-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP19]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: [[CONV12:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP21]], i32* [[CONV12]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to i8* -// CHECK2-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to i8* -// CHECK2-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP22]] to i8* -// CHECK2-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 -// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: [[TMP30:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [10 x [10 x i32]]*)* @__omp_outlined__9 to i8*), i8* null, i8** [[TMP33]], i64 4) +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK2-NEXT: store i64 [[TMP22]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK2-NEXT: store i64 [[TMP24]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP26]], i64* [[TMP25]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP28]], i64* [[TMP27]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 8 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store [10 x [10 x i32]]* [[TMP4]], [10 x [10 x i32]]** [[TMP31]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = bitcast %struct.anon.11* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP33:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 32) +// CHECK2-NEXT: [[TMP34:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP32]], i8* [[TMP33]]) +// CHECK2-NEXT: [[TMP35:%.*]] = load [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK2-NEXT: [[TMP36:%.*]] = bitcast i8* [[TMP34]] to %struct.anon.11* +// CHECK2-NEXT: store [[STRUCT_ANON_11]] [[TMP35]], %struct.anon.11* [[TMP36]], align 8 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP38]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.11*)* @__omp_outlined__9 to i8*), i8* null, i8* [[TMP34]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP33]], i64 32) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK2-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK2-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] -// CHECK2-NEXT: br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]] -// CHECK2: cond.true17: -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: br label [[COND_END19:%.*]] -// CHECK2: cond.false18: +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END19]] -// CHECK2: cond.end19: -// CHECK2-NEXT: [[COND20:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE17]] ], [ [[TMP43]], [[COND_FALSE18]] ] -// CHECK2-NEXT: store i32 [[COND20]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP44]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK2-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] +// CHECK2-NEXT: br i1 [[CMP15]], label [[COND_TRUE16:%.*]], label [[COND_FALSE17:%.*]] +// CHECK2: cond.true16: +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: br label [[COND_END18:%.*]] +// CHECK2: cond.false17: +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END18]] +// CHECK2: cond.end18: +// CHECK2-NEXT: [[COND19:%.*]] = phi i32 [ [[TMP47]], [[COND_TRUE16]] ], [ [[TMP48]], [[COND_FALSE17]] ] +// CHECK2-NEXT: store i32 [[COND19]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP49]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP46]]) +// CHECK2-NEXT: [[TMP50:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[TMP50]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP51]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2871,113 +3039,120 @@ // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I10:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[J11:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK2-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK2-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], [[DIV5]] // CHECK2-NEXT: [[SUB6:%.*]] = sub nsw i32 [[MUL]], 1 // CHECK2-NEXT: store i32 [[SUB6]], i32* [[DOTCAPTURE_EXPR_3]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 // CHECK2-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: land.lhs.true: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK2-NEXT: store i32 [[CONV8]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[CONV9]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK2-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[CONV8]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV12:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP13:%.*]] = icmp ule i64 [[CONV12]], [[TMP14]] -// CHECK2-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV11:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CMP12:%.*]] = icmp ule i64 [[CONV11]], [[TMP22]] +// CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP16]], 0 -// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 -// CHECK2-NEXT: [[MUL16:%.*]] = mul nsw i32 1, [[DIV15]] -// CHECK2-NEXT: [[DIV17:%.*]] = sdiv i32 [[TMP15]], [[MUL16]] -// CHECK2-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL18]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I10]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP19]], 0 -// CHECK2-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 -// CHECK2-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]] -// CHECK2-NEXT: [[DIV22:%.*]] = sdiv i32 [[TMP18]], [[MUL21]] -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB23:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK2-NEXT: [[DIV24:%.*]] = sdiv i32 [[SUB23]], 1 -// CHECK2-NEXT: [[MUL25:%.*]] = mul nsw i32 1, [[DIV24]] -// CHECK2-NEXT: [[MUL26:%.*]] = mul nsw i32 [[DIV22]], [[MUL25]] -// CHECK2-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP17]], [[MUL26]] -// CHECK2-NEXT: [[MUL28:%.*]] = mul nsw i32 [[SUB27]], 1 -// CHECK2-NEXT: [[ADD29:%.*]] = add nsw i32 0, [[MUL28]] -// CHECK2-NEXT: store i32 [[ADD29]], i32* [[J11]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I10]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[J11]], align 4 -// CHECK2-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[I10]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[J11]], align 4 -// CHECK2-NEXT: [[IDXPROM31:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK2-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM31]] -// CHECK2-NEXT: store i32 [[ADD30]], i32* [[ARRAYIDX32]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK2-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK2-NEXT: [[DIV16:%.*]] = sdiv i32 [[TMP23]], [[MUL15]] +// CHECK2-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL17]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I9]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB18:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK2-NEXT: [[DIV19:%.*]] = sdiv i32 [[SUB18]], 1 +// CHECK2-NEXT: [[MUL20:%.*]] = mul nsw i32 1, [[DIV19]] +// CHECK2-NEXT: [[DIV21:%.*]] = sdiv i32 [[TMP26]], [[MUL20]] +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK2-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK2-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK2-NEXT: [[MUL25:%.*]] = mul nsw i32 [[DIV21]], [[MUL24]] +// CHECK2-NEXT: [[SUB26:%.*]] = sub nsw i32 [[TMP25]], [[MUL25]] +// CHECK2-NEXT: [[MUL27:%.*]] = mul nsw i32 [[SUB26]], 1 +// CHECK2-NEXT: [[ADD28:%.*]] = add nsw i32 0, [[MUL27]] +// CHECK2-NEXT: store i32 [[ADD28]], i32* [[J10]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[I9]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[J10]], align 4 +// CHECK2-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[I9]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[J10]], align 4 +// CHECK2-NEXT: [[IDXPROM30:%.*]] = sext i32 [[TMP32]] to i64 +// CHECK2-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM30]] +// CHECK2-NEXT: store i32 [[ADD29]], i32* [[ARRAYIDX31]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD33:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK2-NEXT: store i32 [[ADD33]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD32:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: store i32 [[ADD32]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]]) +// CHECK2-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP36]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -2989,7 +3164,7 @@ // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 // CHECK2-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 @@ -3002,14 +3177,17 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[V_ADDR]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store [1000 x i32]* [[TMP0]], [1000 x i32]** [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[V_ADDR]], align 8 +// CHECK2-NEXT: store i32* [[TMP7]], i32** [[TMP6]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -3017,13 +3195,12 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3033,139 +3210,145 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_13]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32*, i32** [[V_ADDR]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK2-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK2-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK2-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: [[TMP28:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 -// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK2-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP20]] to i8* -// CHECK2-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @__omp_outlined__11 to i8*), i8* null, i8** [[TMP33]], i64 5) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK2-NEXT: store i64 [[TMP20]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK2-NEXT: store i64 [[TMP22]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP24]], i64* [[TMP23]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP26]], i64* [[TMP25]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP28]], i32* [[TMP27]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP29]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK2-NEXT: store i32* [[TMP31]], i32** [[TMP30]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = bitcast %struct.anon.13* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP33:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 40) +// CHECK2-NEXT: [[TMP34:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP32]], i8* [[TMP33]]) +// CHECK2-NEXT: [[TMP35:%.*]] = load [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK2-NEXT: [[TMP36:%.*]] = bitcast i8* [[TMP34]] to %struct.anon.13* +// CHECK2-NEXT: store [[STRUCT_ANON_13]] [[TMP35]], %struct.anon.13* [[TMP36]], align 8 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP38]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.13*)* @__omp_outlined__11 to i8*), i8* null, i8* [[TMP34]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP33]], i64 40) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] -// CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK2: cond.true11: -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: br label [[COND_END13:%.*]] -// CHECK2: cond.false12: +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END13]] -// CHECK2: cond.end13: -// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] -// CHECK2-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP44]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] +// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK2: cond.true10: +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: br label [[COND_END12:%.*]] +// CHECK2: cond.false11: +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END12]] +// CHECK2: cond.end12: +// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP47]], [[COND_TRUE10]] ], [ [[TMP48]], [[COND_FALSE11]] ] +// CHECK2-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP49]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP46]]) +// CHECK2-NEXT: [[TMP50:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[TMP50]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP51]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__11 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3175,80 +3358,87 @@ // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK2-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK2-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP17]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV5:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP20]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[V_ADDR]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM8]] -// CHECK2-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX9]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP22]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i64 0, i64 [[IDXPROM7]] +// CHECK2-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX8]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) +// CHECK2-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -3260,8 +3450,7 @@ // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 // CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 @@ -3273,15 +3462,17 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[L_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store [1000 x i32]* [[TMP0]], [1000 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -3289,13 +3480,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3305,130 +3496,138 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[L]], align 4 // CHECK3-NEXT: [[L1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK3-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L1]] to i32* -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP18]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[L_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[L_CASTED]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP27:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK3-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP19]] to i8* -// CHECK3-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP32]], i32 5) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[L]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK3-NEXT: [[TMP32:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 20) +// CHECK3-NEXT: [[TMP33:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP31]], i8* [[TMP32]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = bitcast i8* [[TMP33]] to %struct.anon.0* +// CHECK3-NEXT: store [[STRUCT_ANON_0]] [[TMP34]], %struct.anon.0* [[TMP35]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP37]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP33]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[TMP32]], i32 20) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] // CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP44]], [[TMP45]] // CHECK3-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK3: cond.true11: -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END13:%.*]] // CHECK3: cond.false12: -// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END13]] // CHECK3: cond.end13: -// CHECK3-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ] +// CHECK3-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP46]], [[COND_TRUE11]] ], [ [[TMP47]], [[COND_FALSE12]] ] // CHECK3-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP48]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]]) -// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 -// CHECK3-NEXT: br i1 [[TMP47]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP49:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = load i32, i32* [[TMP49]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP50]]) +// CHECK3-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 +// CHECK3-NEXT: br i1 [[TMP52]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP48:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP48]], i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: [[TMP53:%.*]] = load i32, i32* [[L]], align 4 +// CHECK3-NEXT: store i32 [[TMP53]], i32* [[L]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] @@ -3438,15 +3637,15 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3459,104 +3658,114 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[L]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP18]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i32 0, i32 [[TMP29]] // CHECK3-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], i32* [[L]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], 1 // CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP37]]) +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK3-NEXT: br i1 [[TMP39]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP30]], i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[L]], align 4 +// CHECK3-NEXT: store i32 [[TMP40]], i32* [[L]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] @@ -3569,7 +3778,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 @@ -3580,12 +3789,14 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store [1000 x i16]* [[TMP0]], [1000 x i16]** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -3593,12 +3804,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [1000 x i16]* noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3608,129 +3819,139 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load [1000 x i16]*, [1000 x i16]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP16]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK3-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], i32* [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store [1000 x i16]* [[TMP4]], [1000 x i16]** [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = bitcast %struct.anon.2* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK3-NEXT: [[TMP28:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 16) +// CHECK3-NEXT: [[TMP29:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP27]], i8* [[TMP28]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = bitcast i8* [[TMP29]] to %struct.anon.2* +// CHECK3-NEXT: store [[STRUCT_ANON_2]] [[TMP30]], %struct.anon.2* [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.2*)* @__omp_outlined__3 to i8*), i8* null, i8* [[TMP29]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[TMP28]], i32 16) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] // CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK3: cond.true10: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END12:%.*]] // CHECK3: cond.false11: -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END12]] // CHECK3: cond.end12: -// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE10]] ], [ [[TMP43]], [[COND_FALSE11]] ] // CHECK3-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP44]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP41]]) +// CHECK3-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP46]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [1000 x i16]* noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3743,52 +3964,60 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load [1000 x i16]*, [1000 x i16]** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP8]], i32 0, i32 [[TMP21]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP22]] to i32 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 // CHECK3-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 @@ -3796,17 +4025,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -3816,6 +4045,7 @@ // CHECK3-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 @@ -3825,9 +4055,11 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP3]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -3835,106 +4067,118 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_4]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 10 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = bitcast %struct.anon.4* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK3-NEXT: [[TMP17:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 12) +// CHECK3-NEXT: [[TMP18:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP16]], i8* [[TMP17]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP18]] to %struct.anon.4* +// CHECK3-NEXT: store [[STRUCT_ANON_4]] [[TMP19]], %struct.anon.4* [[TMP20]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.4*)* @__omp_outlined__5 to i8*), i8* null, i8* [[TMP18]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[TMP17]], i32 12) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP27]], 9 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] +// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP28]], [[COND_FALSE6]] ] // CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP29]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3944,52 +4188,58 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP15]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK3-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -3998,7 +4248,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 // CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 @@ -4009,12 +4259,14 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[F_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [10 x [10 x i32]]* [[TMP0]], [10 x [10 x i32]]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -4022,12 +4274,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK3-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4035,106 +4287,116 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_6]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[F]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP10]], 100 // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* -// CHECK3-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [10 x [10 x i32]]* [[TMP2]], [10 x [10 x i32]]** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[F]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast %struct.anon.6* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK3-NEXT: [[TMP21:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 16) +// CHECK3-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP20]], i8* [[TMP21]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to %struct.anon.6* +// CHECK3-NEXT: store [[STRUCT_ANON_6]] [[TMP23]], %struct.anon.6* [[TMP24]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.6*)* @__omp_outlined__7 to i8*), i8* null, i8* [[TMP22]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[TMP21]], i32 16) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP31]], 99 // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK3: cond.true6: // CHECK3-NEXT: br label [[COND_END8:%.*]] // CHECK3: cond.false7: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END8]] // CHECK3: cond.end8: -// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP32]], [[COND_FALSE7]] ] // CHECK3-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4147,70 +4409,78 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[F]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 10 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP18]], 10 // CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL3]] // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] // CHECK3-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 // CHECK3-NEXT: store i32 10, i32* [[K]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK3-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[J]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[F]], align 4 +// CHECK3-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], [[MUL6]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[K]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP22]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP6]], i32 0, i32 [[TMP23]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP24]] // CHECK3-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]]) // CHECK3-NEXT: ret void // // @@ -4219,7 +4489,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 @@ -4230,12 +4500,14 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store [10 x [10 x i32]]* [[TMP0]], [10 x [10 x i32]]** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -4243,12 +4515,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__8 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4261,25 +4533,31 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I9:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[J10:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK3-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK3-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] @@ -4287,120 +4565,124 @@ // CHECK3-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 // CHECK3-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP10]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK3-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_COMB_UB]], align 8 // CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() // CHECK3-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) -// CHECK3-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK3-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 91, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK3-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP14]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1 -// CHECK3-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP15]], [[ADD]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP20]], 1 +// CHECK3-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP19]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32 -// CHECK3-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP21]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP18]] to i8* -// CHECK3-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP20]] to i8* -// CHECK3-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP22]] to i8* -// CHECK3-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP30:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [10 x [10 x i32]]*)* @__omp_outlined__9 to i8*), i8* null, i8** [[TMP33]], i32 4) +// CHECK3-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32 +// CHECK3-NEXT: store i32 [[TMP24]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP28]], i32* [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store [10 x [10 x i32]]* [[TMP4]], [10 x [10 x i32]]** [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = bitcast %struct.anon.8* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK3-NEXT: [[TMP33:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 16) +// CHECK3-NEXT: [[TMP34:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP32]], i8* [[TMP33]]) +// CHECK3-NEXT: [[TMP35:%.*]] = load [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = bitcast i8* [[TMP34]] to %struct.anon.8* +// CHECK3-NEXT: store [[STRUCT_ANON_8]] [[TMP35]], %struct.anon.8* [[TMP36]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP38]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.8*)* @__omp_outlined__9 to i8*), i8* null, i8* [[TMP34]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[TMP33]], i32 16) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP34]], [[TMP35]] +// CHECK3-NEXT: [[TMP39:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP39]], [[TMP40]] // CHECK3-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK3-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] +// CHECK3-NEXT: [[TMP41:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP41]], [[TMP42]] // CHECK3-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK3-NEXT: [[TMP38:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK3-NEXT: [[TMP39:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], [[TMP39]] +// CHECK3-NEXT: [[TMP43:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP44:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP43]], [[TMP44]] // CHECK3-NEXT: store i64 [[ADD16]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK3-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK3-NEXT: [[TMP41:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK3-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP40]], [[TMP41]] +// CHECK3-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP46:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP45]], [[TMP46]] // CHECK3-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]] // CHECK3: cond.true18: -// CHECK3-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK3-NEXT: br label [[COND_END20:%.*]] // CHECK3: cond.false19: -// CHECK3-NEXT: [[TMP43:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP48:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 // CHECK3-NEXT: br label [[COND_END20]] // CHECK3: cond.end20: -// CHECK3-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP42]], [[COND_TRUE18]] ], [ [[TMP43]], [[COND_FALSE19]] ] +// CHECK3-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP47]], [[COND_TRUE18]] ], [ [[TMP48]], [[COND_FALSE19]] ] // CHECK3-NEXT: store i64 [[COND21]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK3-NEXT: [[TMP44:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP44]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP49:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP49]], i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP46]]) +// CHECK3-NEXT: [[TMP50:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP51:%.*]] = load i32, i32* [[TMP50]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP51]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4417,21 +4699,29 @@ // CHECK3-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK3-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK3-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] @@ -4439,91 +4729,91 @@ // CHECK3-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 // CHECK3-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK3-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[CONV9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CONV10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: store i64 [[TMP15]], i64* [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[CONV9:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CONV10:%.*]] = zext i32 [[TMP17]] to i64 // CHECK3-NEXT: store i64 [[CONV9]], i64* [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 [[CONV10]], i64* [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB3]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP20]], i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CONV13:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK3-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP13]], [[CONV13]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CONV13:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK3-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP21]], [[CONV13]] // CHECK3-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP16]], 0 +// CHECK3-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP24]], 0 // CHECK3-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK3-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] // CHECK3-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK3-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP15]], [[CONV18]] +// CHECK3-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP23]], [[CONV18]] // CHECK3-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] // CHECK3-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 // CHECK3-NEXT: store i32 [[CONV21]], i32* [[I11]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP19]], 0 +// CHECK3-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK3-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 // CHECK3-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] // CHECK3-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK3-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP18]], [[CONV25]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK3-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP26]], [[CONV25]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP28]], 0 // CHECK3-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 // CHECK3-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] // CHECK3-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 // CHECK3-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK3-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP17]], [[MUL31]] +// CHECK3-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP25]], [[MUL31]] // CHECK3-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 // CHECK3-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] // CHECK3-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 // CHECK3-NEXT: store i32 [[CONV35]], i32* [[J12]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I11]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[J12]], align 4 -// CHECK3-NEXT: [[ADD36:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[I11]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP23]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[J12]], align 4 -// CHECK3-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP24]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[I11]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[J12]], align 4 +// CHECK3-NEXT: [[ADD36:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[I11]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP8]], i32 0, i32 [[TMP31]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[J12]], align 4 +// CHECK3-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP32]] // CHECK3-NEXT: store i32 [[ADD36]], i32* [[ARRAYIDX37]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK3-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP33]], [[TMP34]] // CHECK3-NEXT: store i64 [[ADD38]], i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]]) +// CHECK3-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP36]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -4535,7 +4825,7 @@ // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 // CHECK3-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 @@ -4547,13 +4837,17 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[V_ADDR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store [1000 x i32]* [[TMP0]], [1000 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[V_ADDR]], align 4 +// CHECK3-NEXT: store i32* [[TMP7]], i32** [[TMP6]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -4561,13 +4855,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4577,135 +4870,143 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_10]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[V_ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK3-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK3-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK3-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP26:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK3-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP18]] to i8* -// CHECK3-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @__omp_outlined__11 to i8*), i8* null, i8** [[TMP31]], i32 5) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], i32* [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: store i32* [[TMP29]], i32** [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = bitcast %struct.anon.10* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK3-NEXT: [[TMP31:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 20) +// CHECK3-NEXT: [[TMP32:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP30]], i8* [[TMP31]]) +// CHECK3-NEXT: [[TMP33:%.*]] = load [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = bitcast i8* [[TMP32]] to %struct.anon.10* +// CHECK3-NEXT: store [[STRUCT_ANON_10]] [[TMP33]], %struct.anon.10* [[TMP34]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP36]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.10*)* @__omp_outlined__11 to i8*), i8* null, i8* [[TMP32]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[TMP31]], i32 20) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] // CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK3: cond.true10: -// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END12:%.*]] // CHECK3: cond.false11: -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END12]] // CHECK3: cond.end12: -// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE10]] ], [ [[TMP41]], [[COND_FALSE11]] ] +// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE10]] ], [ [[TMP46]], [[COND_FALSE11]] ] // CHECK3-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP42]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP47]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP44]]) +// CHECK3-NEXT: [[TMP48:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP48]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP49]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__11 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4718,71 +5019,79 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP17]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP19]], [[TMP20]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[V_ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 [[TMP14]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX5]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP22]], i32 [[TMP23]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i32 0, i32 [[TMP25]] +// CHECK3-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX5]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp @@ -35,8 +35,7 @@ // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 @@ -50,17 +49,17 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], i32* [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -68,272 +67,284 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARGC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP18]], [[ADD]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK1-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP21]] to i8* -// CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, i32*, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i64 5) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP34:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 40) +// CHECK1-NEXT: [[TMP35:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP33]], i8* [[TMP34]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP35]] to %struct.anon.0* +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP36]], %struct.anon.0* [[TMP37]], align 1 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP39]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP35]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP34]], i64 40) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] -// CHECK1: cond.true14: -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: br label [[COND_END16:%.*]] -// CHECK1: cond.false15: +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END16]] -// CHECK1: cond.end16: -// CHECK1-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE14]] ], [ [[TMP44]], [[COND_FALSE15]] ] -// CHECK1-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP46]], [[TMP47]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK1: cond.true11: +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: br label [[COND_END13:%.*]] +// CHECK1: cond.false12: +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END13]] +// CHECK1: cond.end13: +// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP48]], [[COND_TRUE11]] ], [ [[TMP49]], [[COND_FALSE12]] ] +// CHECK1-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP50]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]]) +// CHECK1-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP52]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARGC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I7:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP19]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP17]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP11]] to i32 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP10]], [[CONV8]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP21]] to i32 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP20]], [[CONV6]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP22]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV10]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[I7]]) #[[ATTR5:[0-9]+]] -// CHECK1-NEXT: [[CALL13:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[TMP0]]) #[[ATTR5]] -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CALL]], [[CALL13]] -// CHECK1-NEXT: [[CALL15:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[CONV]]) #[[ATTR5]] -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD14]], [[CALL15]] -// CHECK1-NEXT: store i32 [[ADD16]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[I5]]) #[[ATTR7:[0-9]+]] +// CHECK1-NEXT: [[CALL11:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[TMP8]]) #[[ATTR7]] +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[CALL]], [[CALL11]] +// CHECK1-NEXT: [[CALL13:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARGC]]) #[[ATTR7]] +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[ADD12]], [[CALL13]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[TMP8]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: store i32 [[ADD16]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP36]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -345,8 +356,7 @@ // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -358,15 +368,17 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], i32* [[ARGC_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store i32* [[TMP0]], i32** [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], i32* [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -374,13 +386,13 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[ARGC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK2-NEXT: [[ARGC:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -390,138 +402,146 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[ARGC]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP18]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP16]], i32* [[ARGC_CASTED]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK2-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK2-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP19]] to i8* -// CHECK2-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, i32*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP32]], i32 5) +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK2-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store i32* [[TMP4]], i32** [[TMP28]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP32:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 20) +// CHECK2-NEXT: [[TMP33:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP31]], i8* [[TMP32]]) +// CHECK2-NEXT: [[TMP34:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = bitcast i8* [[TMP33]] to %struct.anon.0* +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP34]], %struct.anon.0* [[TMP35]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP37]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP33]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP32]], i32 20) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] // CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] // CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP44]], [[TMP45]] // CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK2: cond.true11: -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: br label [[COND_END13:%.*]] // CHECK2: cond.false12: -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END13]] // CHECK2: cond.end13: -// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ] +// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP46]], [[COND_TRUE11]] ], [ [[TMP47]], [[COND_FALSE12]] ] // CHECK2-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP48]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]]) +// CHECK2-NEXT: [[TMP49:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[TMP49]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP50]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[ARGC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[ARGC:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -534,100 +554,110 @@ // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[ARGC]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP19]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP17]]) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]] // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[I4]]) #[[ATTR5:[0-9]+]] -// CHECK2-NEXT: [[CALL8:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[TMP0]]) #[[ATTR5]] +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[I4]]) #[[ATTR7:[0-9]+]] +// CHECK2-NEXT: [[CALL8:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[TMP8]]) #[[ATTR7]] // CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[CALL]], [[CALL8]] -// CHECK2-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARGC_ADDR]]) #[[ATTR5]] +// CHECK2-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z3fooPi(i32* noundef [[ARGC]]) #[[ATTR7]] // CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CALL10]] -// CHECK2-NEXT: store i32 [[ADD11]], i32* [[TMP0]], align 4 +// CHECK2-NEXT: store i32 [[ADD11]], i32* [[TMP8]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK2-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) +// CHECK2-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP36]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -65,8 +65,7 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 // CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 @@ -80,17 +79,17 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [1000 x i32]* [[TMP0]], [1000 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -98,311 +97,323 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK1-NEXT: [[L2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) -// CHECK1-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L2]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[L]], align 4 +// CHECK1-NEXT: [[L1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L1]] to i32* +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP18]], [[ADD]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[L_CASTED]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP29:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK1-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP21]] to i8* -// CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP34:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i64 5), !llvm.access.group !12 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP30]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[L]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP34:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 40), !llvm.access.group !12 +// CHECK1-NEXT: [[TMP35:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP33]], i8* [[TMP34]]), !llvm.access.group !12 +// CHECK1-NEXT: [[TMP36:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 1, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP37:%.*]] = bitcast i8* [[TMP35]] to %struct.anon.0* +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP36]], %struct.anon.0* [[TMP37]], align 1, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP39]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP35]]), !llvm.access.group !12 +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP34]], i64 40), !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] -// CHECK1: cond.true14: -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: br label [[COND_END16:%.*]] -// CHECK1: cond.false15: +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: br label [[COND_END16]] -// CHECK1: cond.end16: -// CHECK1-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE14]] ], [ [[TMP44]], [[COND_FALSE15]] ] -// CHECK1-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP46]], [[TMP47]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK1: cond.true11: +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: br label [[COND_END13:%.*]] +// CHECK1: cond.false12: +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: br label [[COND_END13]] +// CHECK1: cond.end13: +// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP48]], [[COND_TRUE11]] ], [ [[TMP49]], [[COND_FALSE12]] ] +// CHECK1-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: store i32 [[TMP50]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]]) -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 -// CHECK1-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP52]]) +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 +// CHECK1-NEXT: br i1 [[TMP54]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB18:%.*]] = sub nsw i32 [[TMP50]], 0 -// CHECK1-NEXT: [[DIV19:%.*]] = sdiv i32 [[SUB18]], 1 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV19]], 1 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD20]], i32* [[I5]], align 4 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP55]], 0 +// CHECK1-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD17]], i32* [[I4]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 -// CHECK1-NEXT: br i1 [[TMP52]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0 +// CHECK1-NEXT: br i1 [[TMP57]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: store i32 [[TMP53]], i32* [[CONV1]], align 4 +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[L]], align 4 +// CHECK1-NEXT: store i32 [[TMP58]], i32* [[L]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[L2]], i64 4) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[L1]], i64 4) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[L]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP18]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP10]] to i32 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP9]], [[CONV7]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP20]] to i32 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP19]], [[CONV5]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP21]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV7]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !16 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !16 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !16 -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: store i32 [[TMP30]], i32* [[L]], align 4, !llvm.access.group !16 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP37]]) +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK1-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP30]], 0 -// CHECK1-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK1-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 0, [[MUL17]] -// CHECK1-NEXT: store i32 [[ADD18]], i32* [[I6]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] +// CHECK1-NEXT: store i32 [[ADD16]], i32* [[I4]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK1-NEXT: br i1 [[TMP42]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: store i32 [[TMP33]], i32* [[CONV1]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[L]], align 4 +// CHECK1-NEXT: store i32 [[TMP43]], i32* [[L]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -415,7 +426,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 @@ -427,13 +438,14 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [1000 x i16]* [[TMP0]], [1000 x i16]** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -441,12 +453,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i16]* noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -456,129 +468,137 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [1000 x i16]*, [1000 x i16]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP16]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP30:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP30]], i64 4), !llvm.access.group !19 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[TMP22]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [1000 x i16]* [[TMP4]], [1000 x i16]** [[TMP28]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast %struct.anon.3* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP30:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 32), !llvm.access.group !19 +// CHECK1-NEXT: [[TMP31:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP29]], i8* [[TMP30]]), !llvm.access.group !19 +// CHECK1-NEXT: [[TMP32:%.*]] = load [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i8* [[TMP31]] to %struct.anon.3* +// CHECK1-NEXT: store [[STRUCT_ANON_3]] [[TMP32]], %struct.anon.3* [[TMP33]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP35]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.3*)* @__omp_outlined__3 to i8*), i8* null, i8* [[TMP31]]), !llvm.access.group !19 +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP30]], i64 32), !llvm.access.group !19 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK1: cond.true11: -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: br label [[COND_END13:%.*]] -// CHECK1: cond.false12: +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: br label [[COND_END13]] -// CHECK1: cond.end13: -// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ] -// CHECK1-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: store i32 [[TMP41]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK1: cond.true10: +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: br label [[COND_END12:%.*]] +// CHECK1: cond.false11: +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: br label [[COND_END12]] +// CHECK1: cond.end12: +// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP44]], [[COND_TRUE10]] ], [ [[TMP45]], [[COND_FALSE11]] ] +// CHECK1-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: store i32 [[TMP46]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP43]]) -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 -// CHECK1-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP47:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[TMP47]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP48]]) +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK1-NEXT: br i1 [[TMP50]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP46]], 0 -// CHECK1-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD17]], i32* [[I3]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP51]], 0 +// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD16]], i32* [[I3]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -587,14 +607,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i16]* noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -604,88 +624,95 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load [1000 x i16]*, [1000 x i16]** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group !22 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 -// CHECK1-NEXT: [[CONV8:%.*]] = sext i16 [[TMP14]] to i32 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], 1 -// CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i16 -// CHECK1-NEXT: store i16 [[CONV10]], i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i16, i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 +// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP22]] to i32 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[CONV7]], 1 +// CHECK1-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD8]] to i16 +// CHECK1-NEXT: store i16 [[CONV9]], i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP21]], 0 -// CHECK1-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 -// CHECK1-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] -// CHECK1-NEXT: store i32 [[ADD15]], i32* [[I5]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK1-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 +// CHECK1-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[I4]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -697,6 +724,7 @@ // CHECK1-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 @@ -706,9 +734,11 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -716,100 +746,112 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_5]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 10 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK1-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP17]], i64 3), !llvm.access.group !25 +// CHECK1-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !25 +// CHECK1-NEXT: store i64 [[TMP14]], i64* [[TMP13]], align 8, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !25 +// CHECK1-NEXT: store i64 [[TMP16]], i64* [[TMP15]], align 8, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP17]], align 8, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.anon.5* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP19:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 24), !llvm.access.group !25 +// CHECK1-NEXT: [[TMP20:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP18]], i8* [[TMP19]]), !llvm.access.group !25 +// CHECK1-NEXT: [[TMP21:%.*]] = load [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], align 8, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP20]] to %struct.anon.5* +// CHECK1-NEXT: store [[STRUCT_ANON_5]] [[TMP21]], %struct.anon.5* [[TMP22]], align 8, !llvm.access.group !25 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.5*)* @__omp_outlined__5 to i8*), i8* null, i8* [[TMP20]]), !llvm.access.group !25 +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP19]], i64 24), !llvm.access.group !25 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP29]], 9 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: -// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP30]], [[COND_FALSE6]] ] // CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK1-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -818,13 +860,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -834,59 +876,65 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group !28 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !28 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -899,7 +947,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 // CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 @@ -911,13 +959,14 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP0]], [10 x [10 x i32]]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -925,12 +974,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK1-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -938,101 +987,109 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[F]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP10]], 100 // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP12]] to i8* -// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x [10 x i32]]*, i64)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP21]], i64 4), !llvm.access.group !31 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK1-NEXT: store i64 [[TMP14]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: store i64 [[TMP16]], i64* [[TMP15]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: store i64 [[TMP18]], i64* [[TMP17]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP2]], [10 x [10 x i32]]** [[TMP19]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[F]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: store i32 [[TMP21]], i32* [[TMP20]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.anon.8* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP23:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 32), !llvm.access.group !31 +// CHECK1-NEXT: [[TMP24:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP22]], i8* [[TMP23]]), !llvm.access.group !31 +// CHECK1-NEXT: [[TMP25:%.*]] = load [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], align 1, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i8* [[TMP24]] to %struct.anon.8* +// CHECK1-NEXT: store [[STRUCT_ANON_8]] [[TMP25]], %struct.anon.8* [[TMP26]], align 1, !llvm.access.group !31 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.8*)* @__omp_outlined__7 to i8*), i8* null, i8* [[TMP24]]), !llvm.access.group !31 +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP23]], i64 32), !llvm.access.group !31 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP28]], 99 -// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] -// CHECK1: cond.true7: -// CHECK1-NEXT: br label [[COND_END9:%.*]] -// CHECK1: cond.false8: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: br label [[COND_END9]] -// CHECK1: cond.end9: -// CHECK1-NEXT: [[COND10:%.*]] = phi i32 [ 99, [[COND_TRUE7]] ], [ [[TMP29]], [[COND_FALSE8]] ] -// CHECK1-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP33]], 99 +// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK1: cond.true6: +// CHECK1-NEXT: br label [[COND_END8:%.*]] +// CHECK1: cond.false7: +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: br label [[COND_END8]] +// CHECK1: cond.end8: +// CHECK1-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP34]], [[COND_FALSE7]] ] +// CHECK1-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: store i32 [[TMP35]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, i32* [[I]], align 4 // CHECK1-NEXT: store i32 10, i32* [[J]], align 4 @@ -1042,14 +1099,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1062,79 +1119,86 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[F]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group !34 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV4]], [[TMP7]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !34 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 10 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[TMP10]], 10 -// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[DIV5]], 10 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL6]] -// CHECK1-NEXT: [[MUL7:%.*]] = mul nsw i32 [[SUB]], 1 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[J]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 10 +// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[J]], align 4, !llvm.access.group !34 // CHECK1-NEXT: store i32 10, i32* [[K]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD10]], [[TMP14]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM12]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX13]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[F]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[MUL8]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[K]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[ARRAYIDX12]], align 4, !llvm.access.group !34 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, i32* [[I]], align 4 // CHECK1-NEXT: store i32 10, i32* [[J]], align 4 @@ -1149,8 +1213,7 @@ // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 // CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 @@ -1162,15 +1225,17 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[L_CASTED]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store [1000 x i32]* [[TMP0]], [1000 x i32]** [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1178,13 +1243,13 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1194,142 +1259,150 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[L]], align 4 // CHECK2-NEXT: [[L1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L1]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP18]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[L_ADDR]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[L_CASTED]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[L_CASTED]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK2-NEXT: [[TMP27:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK2-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP19]] to i8* -// CHECK2-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP32:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP32]], i32 5), !llvm.access.group !12 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP28]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[L]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP31:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP32:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 20), !llvm.access.group !12 +// CHECK2-NEXT: [[TMP33:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP31]], i8* [[TMP32]]), !llvm.access.group !12 +// CHECK2-NEXT: [[TMP34:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP35:%.*]] = bitcast i8* [[TMP33]] to %struct.anon.0* +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP34]], %struct.anon.0* [[TMP35]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP37]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP33]]), !llvm.access.group !12 +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP32]], i32 20), !llvm.access.group !12 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] // CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] // CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP44]], [[TMP45]] // CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK2: cond.true11: -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 // CHECK2-NEXT: br label [[COND_END13:%.*]] // CHECK2: cond.false12: -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK2-NEXT: br label [[COND_END13]] // CHECK2: cond.end13: -// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ] +// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP46]], [[COND_TRUE11]] ], [ [[TMP47]], [[COND_FALSE12]] ] // CHECK2-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i32 [[TMP48]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]]) -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 -// CHECK2-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: [[TMP49:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[TMP49]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP50]]) +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 +// CHECK2-NEXT: br i1 [[TMP52]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: -// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP48]], 0 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP53]], 0 // CHECK2-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD17]], i32* [[I4]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK2: .omp.final.done: -// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 -// CHECK2-NEXT: br i1 [[TMP50]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK2-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0 +// CHECK2-NEXT: br i1 [[TMP55]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP51]], i32* [[L_ADDR]], align 4 +// CHECK2-NEXT: [[TMP56:%.*]] = load i32, i32* [[L]], align 4 +// CHECK2-NEXT: store i32 [[TMP56]], i32* [[L]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -1339,15 +1412,15 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1360,116 +1433,126 @@ // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[L]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK2-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP18]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i32 0, i32 [[TMP29]] // CHECK2-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: store i32 [[TMP30]], i32* [[L]], align 4, !llvm.access.group !16 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], 1 // CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK2-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP37]]) +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK2-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP30]], 0 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP40]], 0 // CHECK2-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK2-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] // CHECK2-NEXT: store i32 [[ADD13]], i32* [[I3]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK2: .omp.final.done: -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK2-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK2-NEXT: br i1 [[TMP42]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP33]], i32* [[L_ADDR]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[L]], align 4 +// CHECK2-NEXT: store i32 [[TMP43]], i32* [[L]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -1482,7 +1565,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 @@ -1493,12 +1576,14 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store [1000 x i16]* [[TMP0]], [1000 x i16]** [[TMP5]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1506,12 +1591,12 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [1000 x i16]* noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1521,121 +1606,131 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load [1000 x i16]*, [1000 x i16]** [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP16]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK2-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK2-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4), !llvm.access.group !19 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: store i32 [[TMP21]], i32* [[TMP20]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store [1000 x i16]* [[TMP4]], [1000 x i16]** [[TMP26]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP27:%.*]] = bitcast %struct.anon.2* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP28:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 16), !llvm.access.group !19 +// CHECK2-NEXT: [[TMP29:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP27]], i8* [[TMP28]]), !llvm.access.group !19 +// CHECK2-NEXT: [[TMP30:%.*]] = load [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP31:%.*]] = bitcast i8* [[TMP29]] to %struct.anon.2* +// CHECK2-NEXT: store [[STRUCT_ANON_2]] [[TMP30]], %struct.anon.2* [[TMP31]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.2*)* @__omp_outlined__3 to i8*), i8* null, i8* [[TMP29]]), !llvm.access.group !19 +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP28]], i32 16), !llvm.access.group !19 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] // CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK2: cond.true10: -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 // CHECK2-NEXT: br label [[COND_END12:%.*]] // CHECK2: cond.false11: -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK2-NEXT: br label [[COND_END12]] // CHECK2: cond.end12: -// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE10]] ], [ [[TMP43]], [[COND_FALSE11]] ] // CHECK2-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: store i32 [[TMP44]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP41]]) -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 -// CHECK2-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP46]]) +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0 +// CHECK2-NEXT: br i1 [[TMP48]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP49]], 0 // CHECK2-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -1648,14 +1743,14 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [1000 x i16]* noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1668,52 +1763,60 @@ // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load [1000 x i16]*, [1000 x i16]** [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !22 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP18]], [[TMP19]] // CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !22 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP8]], i32 0, i32 [[TMP21]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i16, i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP22]] to i32 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 // CHECK2-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 @@ -1721,23 +1824,23 @@ // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]]) -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK2-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP21]], 0 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP29]], 0 // CHECK2-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -1753,6 +1856,7 @@ // CHECK2-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 @@ -1762,9 +1866,11 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP3]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1772,98 +1878,110 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_4]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 10 // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3), !llvm.access.group !25 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: store i32 [[TMP14]], i32* [[TMP13]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP15]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP16:%.*]] = bitcast %struct.anon.4* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP17:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 12), !llvm.access.group !25 +// CHECK2-NEXT: [[TMP18:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP16]], i8* [[TMP17]]), !llvm.access.group !25 +// CHECK2-NEXT: [[TMP19:%.*]] = load [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP18]] to %struct.anon.4* +// CHECK2-NEXT: store [[STRUCT_ANON_4]] [[TMP19]], %struct.anon.4* [[TMP20]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.4*)* @__omp_outlined__5 to i8*), i8* null, i8* [[TMP18]]), !llvm.access.group !25 +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP17]], i32 12), !llvm.access.group !25 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP27]], 9 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK2: cond.true5: // CHECK2-NEXT: br label [[COND_END7:%.*]] // CHECK2: cond.false6: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK2-NEXT: br label [[COND_END7]] // CHECK2: cond.end7: -// CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] +// CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP28]], [[COND_FALSE6]] ] // CHECK2-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: store i32 [[TMP29]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK2-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK2-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 10, i32* [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1872,13 +1990,13 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1888,55 +2006,61 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !28 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 -// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP15]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK2-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK2-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 10, i32* [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1949,7 +2073,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 // CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 @@ -1960,12 +2084,14 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], i32* [[F_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store [10 x [10 x i32]]* [[TMP0]], [10 x [10 x i32]]** [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1973,12 +2099,12 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK2-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1986,97 +2112,107 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_6]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK2-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[F]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP10]], 100 // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK2-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* -// CHECK2-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4), !llvm.access.group !31 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: store i32 [[TMP14]], i32* [[TMP13]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: store i32 [[TMP16]], i32* [[TMP15]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store [10 x [10 x i32]]* [[TMP2]], [10 x [10 x i32]]** [[TMP17]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[F]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP20:%.*]] = bitcast %struct.anon.6* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP21:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 16), !llvm.access.group !31 +// CHECK2-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP20]], i8* [[TMP21]]), !llvm.access.group !31 +// CHECK2-NEXT: [[TMP23:%.*]] = load [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to %struct.anon.6* +// CHECK2-NEXT: store [[STRUCT_ANON_6]] [[TMP23]], %struct.anon.6* [[TMP24]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB4]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.6*)* @__omp_outlined__7 to i8*), i8* null, i8* [[TMP22]]), !llvm.access.group !31 +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP21]], i32 16), !llvm.access.group !31 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP31]], 99 // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK2: cond.true6: // CHECK2-NEXT: br label [[COND_END8:%.*]] // CHECK2: cond.false7: -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK2-NEXT: br label [[COND_END8]] // CHECK2: cond.end8: -// CHECK2-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] +// CHECK2-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP32]], [[COND_FALSE7]] ] // CHECK2-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK2-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]]) +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK2-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 10, i32* [[I]], align 4 // CHECK2-NEXT: store i32 10, i32* [[J]], align 4 @@ -2086,14 +2222,14 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2106,73 +2242,81 @@ // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK2-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], i32* [[F]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 10 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP18]], 10 // CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL3]] // CHECK2-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] // CHECK2-NEXT: store i32 [[ADD5]], i32* [[J]], align 4, !llvm.access.group !34 // CHECK2-NEXT: store i32 10, i32* [[K]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[F]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], [[MUL6]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[K]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP22]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP6]], i32 0, i32 [[TMP23]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP24]] // CHECK2-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4, !llvm.access.group !34 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]]) +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK2-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 10, i32* [[I]], align 4 // CHECK2-NEXT: store i32 10, i32* [[J]], align 4 diff --git a/clang/test/OpenMP/nvptx_teams_codegen.cpp b/clang/test/OpenMP/nvptx_teams_codegen.cpp --- a/clang/test/OpenMP/nvptx_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_codegen.cpp @@ -80,6 +80,7 @@ // CHECK1-SAME: (i64 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 @@ -93,9 +94,11 @@ // CHECK1-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[ARGC_ON_STACK]], i32** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i64 4) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void @@ -104,16 +107,18 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -121,6 +126,7 @@ // CHECK1-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 @@ -133,9 +139,11 @@ // CHECK1-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8*** // CHECK1-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i8*** [[ARGC_ON_STACK]], i8**** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void @@ -144,16 +152,18 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i8** null, i8*** [[TMP0]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8***, i8**** [[TMP1]], align 8 +// CHECK1-NEXT: store i8** null, i8*** [[TMP2]], align 8 // CHECK1-NEXT: ret void // // @@ -161,6 +171,7 @@ // CHECK2-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -173,9 +184,11 @@ // CHECK2-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i32* [[ARGC_ON_STACK]], i32** [[TMP3]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR3:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void @@ -184,16 +197,18 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[TMP2]], align 4 // CHECK2-NEXT: ret void // // @@ -201,6 +216,7 @@ // CHECK2-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 @@ -213,9 +229,11 @@ // CHECK2-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8*** // CHECK2-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i8*** [[ARGC_ON_STACK]], i8**** [[TMP3]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void @@ -224,16 +242,18 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i8** null, i8*** [[TMP0]], align 4 +// CHECK2-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8***, i8**** [[TMP1]], align 4 +// CHECK2-NEXT: store i8** null, i8*** [[TMP2]], align 4 // CHECK2-NEXT: ret void // // @@ -243,6 +263,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 @@ -260,9 +281,11 @@ // CHECK3-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC3]] to i32* // CHECK3-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[ARGC_ON_STACK]], i32** [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR3:[0-9]+]] +// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ARGC3]], i64 4) // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void @@ -271,16 +294,18 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -290,6 +315,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 @@ -306,9 +332,11 @@ // CHECK3-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC2]] to i8*** // CHECK3-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i8*** [[ARGC_ON_STACK]], i8**** [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ARGC2]], i64 8) // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void @@ -317,16 +345,18 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: store i8** null, i8*** [[TMP0]], align 8 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8***, i8**** [[TMP1]], align 8 +// CHECK3-NEXT: store i8** null, i8*** [[TMP2]], align 8 // CHECK3-NEXT: ret void // // @@ -336,6 +366,7 @@ // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 @@ -350,9 +381,11 @@ // CHECK4-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32* // CHECK4-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store i32* [[ARGC_ON_STACK]], i32** [[TMP3]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR3:[0-9]+]] +// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK4-NEXT: ret void @@ -361,16 +394,18 @@ // // // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[TMP2]], align 4 // CHECK4-NEXT: ret void // // @@ -380,6 +415,7 @@ // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 @@ -394,9 +430,11 @@ // CHECK4-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8*** // CHECK4-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store i8*** [[ARGC_ON_STACK]], i8**** [[TMP3]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR3]] +// CHECK4-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK4-NEXT: ret void @@ -405,15 +443,17 @@ // // // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: store i8** null, i8*** [[TMP0]], align 4 +// CHECK4-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i8***, i8**** [[TMP1]], align 4 +// CHECK4-NEXT: store i8** null, i8*** [[TMP2]], align 4 // CHECK4-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp --- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -54,6 +54,7 @@ // CHECK1-SAME: (i64 noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[E_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[E]], i64* [[E_ADDR]], align 8 @@ -67,9 +68,11 @@ // CHECK1-NEXT: [[E_ON_STACK:%.*]] = bitcast i8* [[E1]] to double* // CHECK1-NEXT: store double [[TMP1]], double* [[E_ON_STACK]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[E_ON_STACK]], double** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E_ON_STACK]]) #[[ATTR4:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[E1]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void @@ -78,41 +81,43 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[E_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store double* [[E]], double** [[E_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 8 -// CHECK1-NEXT: [[E1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) -// CHECK1-NEXT: [[E_ON_STACK:%.*]] = bitcast i8* [[E1]] to double* +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 8 +// CHECK1-NEXT: [[E:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[E_ON_STACK:%.*]] = bitcast i8* [[E]] to double* // CHECK1-NEXT: store double 0.000000e+00, double* [[E_ON_STACK]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double, double* [[E_ON_STACK]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 +// CHECK1-NEXT: [[TMP3:%.*]] = load double, double* [[E_ON_STACK]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], 5.000000e+00 // CHECK1-NEXT: store double [[ADD]], double* [[E_ON_STACK]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast double* [[E_ON_STACK]] to i8* -// CHECK1-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i8* [[TMP7]], i32 1024, i8* [[TMP6]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func) -// CHECK1-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 -// CHECK1-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast double* [[E_ON_STACK]] to i8* +// CHECK1-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i8* [[TMP9]], i32 1024, i8* [[TMP8]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP10:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load double, double* [[E_ON_STACK]], align 8 -// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP10]], [[TMP11]] -// CHECK1-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load double, double* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load double, double* [[E_ON_STACK]], align 8 +// CHECK1-NEXT: [[ADD1:%.*]] = fadd double [[TMP12]], [[TMP13]] +// CHECK1-NEXT: store double [[ADD1]], double* [[TMP2]], align 8 +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[E1]], i64 8) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[E]], i64 8) // CHECK1-NEXT: ret void // // @@ -354,6 +359,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8 @@ -372,9 +378,13 @@ // CHECK1-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D3]] to float* // CHECK1-NEXT: store float [[TMP2]], float* [[D_ON_STACK]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i8* [[C2]], i8** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store float* [[D_ON_STACK]], float** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[D3]], i64 4) // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[C2]], i64 1) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -384,61 +394,62 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[C:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 8 -// CHECK1-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[C1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) -// CHECK1-NEXT: [[D2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) -// CHECK1-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D2]] to float* -// CHECK1-NEXT: store i8 0, i8* [[C1]], align 1 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load float*, float** [[TMP3]], align 8 +// CHECK1-NEXT: [[C:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[D:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D]] to float* +// CHECK1-NEXT: store i8 0, i8* [[C]], align 1 // CHECK1-NEXT: store float 1.000000e+00, float* [[D_ON_STACK]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[C1]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[C]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP5]] to i32 // CHECK1-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 -// CHECK1-NEXT: store i8 [[CONV3]], i8* [[C1]], align 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[D_ON_STACK]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[XOR]] to i8 +// CHECK1-NEXT: store i8 [[CONV1]], i8* [[C]], align 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[D_ON_STACK]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = fmul float [[TMP6]], 3.300000e+01 // CHECK1-NEXT: store float [[MUL]], float* [[D_ON_STACK]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store i8* [[C1]], i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast float* [[D_ON_STACK]] to i8* -// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i8* [[TMP10]], i32 1024, i8* [[TMP9]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func5, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func6, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func7, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func8) -// CHECK1-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store i8* [[C]], i8** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast float* [[D_ON_STACK]] to i8* +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i8* [[TMP13]], i32 1024, i8* [[TMP12]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func5, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func6, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func7, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func8) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 1 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP0]], align 1 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8, i8* [[C1]], align 1 -// CHECK1-NEXT: [[CONV5:%.*]] = sext i8 [[TMP14]] to i32 -// CHECK1-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] -// CHECK1-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 -// CHECK1-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load float, float* [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[D_ON_STACK]], align 4 -// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP16]] -// CHECK1-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, i8* [[TMP2]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP16]] to i32 +// CHECK1-NEXT: [[TMP17:%.*]] = load i8, i8* [[C]], align 1 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP17]] to i32 +// CHECK1-NEXT: [[XOR4:%.*]] = xor i32 [[CONV2]], [[CONV3]] +// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[XOR4]] to i8 +// CHECK1-NEXT: store i8 [[CONV5]], i8* [[TMP2]], align 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load float, float* [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* [[D_ON_STACK]], align 4 +// CHECK1-NEXT: [[MUL6:%.*]] = fmul float [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store float [[MUL6]], float* [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP8]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[D2]], i64 4) -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[C1]], i64 1) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[D]], i64 4) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[C]], i64 1) // CHECK1-NEXT: ret void // // @@ -627,18 +638,18 @@ // CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* // CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 // CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP7]] // CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]], align 1 // CHECK1-NEXT: store i8 [[TMP11]], i8* [[TMP10]], align 128 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 // CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 // CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 1 +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP7]] // CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[TMP14]], align 4 // CHECK1-NEXT: store float [[TMP16]], float* [[TMP15]], align 128 @@ -656,14 +667,14 @@ // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP5]] // CHECK1-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 1 +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 1 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP5]] // CHECK1-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* // CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 8 @@ -685,18 +696,18 @@ // CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* // CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 // CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP7]] // CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 128 // CHECK1-NEXT: store i8 [[TMP11]], i8* [[TMP9]], align 1 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 // CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 // CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 1 +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP7]] // CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[TMP15]], align 128 // CHECK1-NEXT: store float [[TMP16]], float* [[TMP14]], align 4 @@ -714,14 +725,14 @@ // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP5]] // CHECK1-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 1 +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 1 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP5]] // CHECK1-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* // CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 8 @@ -736,6 +747,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 @@ -747,9 +759,13 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i16* [[CONV1]], i16** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[CONV]], i16* [[CONV1]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -757,139 +773,146 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK1-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK1-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK1-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @__omp_outlined__10 to i8*), i8* null, i8** [[TMP8]], i64 2) -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK1-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i8* [[TMP14]], i32 1024, i8* [[TMP13]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func15, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func16, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func17, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func18, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func19, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func20) -// CHECK1-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK1-NEXT: store i16 -32768, i16* [[B]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[A]], i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i16* [[B]], i16** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast %struct.anon.3* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK1-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP7]], i8* [[TMP8]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP9]] to %struct.anon.3* +// CHECK1-NEXT: store [[STRUCT_ANON_3]] [[TMP10]], %struct.anon.3* [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.3*)* @__omp_outlined__10 to i8*), i8* null, i8* [[TMP9]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP8]], i64 16) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[A]] to i8* +// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i16* [[B]] to i8* +// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i8* [[TMP19]], i32 1024, i8* [[TMP18]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func15, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func16, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func17, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func18, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func19, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func20) +// CHECK1-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP20]], 1 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[OR]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 -// CHECK1-NEXT: [[TMP20:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[OR]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i16, i16* [[TMP4]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP24]] to i32 +// CHECK1-NEXT: [[TMP25:%.*]] = load i16, i16* [[B]], align 2 +// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP25]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV1]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load i16, i16* [[TMP4]], align 2 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP22:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK1-NEXT: [[TMP27:%.*]] = load i16, i16* [[B]], align 2 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i16 [[COND]], i16* [[TMP1]], align 2 -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) +// CHECK1-NEXT: [[COND:%.*]] = phi i16 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i16 [[COND]], i16* [[TMP4]], align 2 +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP13]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK1-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 -// CHECK1-NEXT: store i32 [[OR]], i32* [[A1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK1-NEXT: store i16 -32768, i16* [[B]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 [[OR]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[B]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[B]], align 2 +// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP7]] to i32 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 2, i64 16, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func12, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func13) -// CHECK1-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV1]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[COND]] to i16 +// CHECK1-NEXT: store i16 [[CONV2]], i16* [[B]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i32* [[A]] to i8* +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast i16* [[B]] to i8* +// CHECK1-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], i32 2, i64 16, i8* [[TMP14]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func12, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func13) +// CHECK1-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 +// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK1-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK1-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK1: cond.true9: -// CHECK1-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK1-NEXT: br label [[COND_END11:%.*]] -// CHECK1: cond.false10: -// CHECK1-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK1-NEXT: br label [[COND_END11]] -// CHECK1: cond.end11: -// CHECK1-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] -// CHECK1-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[OR3:%.*]] = or i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[OR3]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i16, i16* [[TMP4]], align 2 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP19]] to i32 +// CHECK1-NEXT: [[TMP20:%.*]] = load i16, i16* [[B]], align 2 +// CHECK1-NEXT: [[CONV5:%.*]] = sext i16 [[TMP20]] to i32 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[CONV4]], [[CONV5]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// CHECK1: cond.true7: +// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP4]], align 2 +// CHECK1-NEXT: br label [[COND_END9:%.*]] +// CHECK1: cond.false8: +// CHECK1-NEXT: [[TMP22:%.*]] = load i16, i16* [[B]], align 2 +// CHECK1-NEXT: br label [[COND_END9]] +// CHECK1: cond.end9: +// CHECK1-NEXT: [[COND10:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE7]] ], [ [[TMP22]], [[COND_FALSE8]] ] +// CHECK1-NEXT: store i16 [[COND10]], i16* [[TMP4]], align 2 +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP9]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: // CHECK1-NEXT: ret void @@ -1263,19 +1286,19 @@ // CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* // CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 // CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP7]] // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 // CHECK1-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 128 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP7]] // CHECK1-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]], align 2 // CHECK1-NEXT: store i16 [[TMP17]], i16* [[TMP16]], align 128 @@ -1293,15 +1316,15 @@ // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP5]] // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* // CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 1 +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP5]] // CHECK1-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* // CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 8 @@ -1323,19 +1346,19 @@ // CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* // CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 // CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP7]] // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 128 // CHECK1-NEXT: store i32 [[TMP12]], i32* [[TMP10]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP7]] // CHECK1-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP16]], align 128 // CHECK1-NEXT: store i16 [[TMP17]], i16* [[TMP15]], align 2 @@ -1353,15 +1376,15 @@ // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP5]] // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* // CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 1 +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP5]] // CHECK1-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* // CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 8 @@ -1376,6 +1399,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 // CHECK2-NEXT: [[E1:%.*]] = alloca double, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 @@ -1387,9 +1411,11 @@ // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = load double, double* [[TMP0]], align 8 // CHECK2-NEXT: store double [[TMP3]], double* [[E1]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store double* [[E1]], double** [[TMP4]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E1]]) #[[ATTR4:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1397,41 +1423,43 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 -// CHECK2-NEXT: [[E1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) -// CHECK2-NEXT: [[E_ON_STACK:%.*]] = bitcast i8* [[E1]] to double* +// CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 4 +// CHECK2-NEXT: [[E:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK2-NEXT: [[E_ON_STACK:%.*]] = bitcast i8* [[E]] to double* // CHECK2-NEXT: store double 0.000000e+00, double* [[E_ON_STACK]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load double, double* [[E_ON_STACK]], align 8 -// CHECK2-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 +// CHECK2-NEXT: [[TMP3:%.*]] = load double, double* [[E_ON_STACK]], align 8 +// CHECK2-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], 5.000000e+00 // CHECK2-NEXT: store double [[ADD]], double* [[E_ON_STACK]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP5:%.*]] = bitcast double* [[E_ON_STACK]] to i8* -// CHECK2-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i8* [[TMP7]], i32 1024, i8* [[TMP6]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func) -// CHECK2-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 -// CHECK2-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = bitcast double* [[E_ON_STACK]] to i8* +// CHECK2-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK2-NEXT: [[TMP9:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i8* [[TMP9]], i32 1024, i8* [[TMP8]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func) +// CHECK2-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1 +// CHECK2-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP10:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load double, double* [[E_ON_STACK]], align 8 -// CHECK2-NEXT: [[ADD2:%.*]] = fadd double [[TMP10]], [[TMP11]] -// CHECK2-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 -// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) +// CHECK2-NEXT: [[TMP12:%.*]] = load double, double* [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load double, double* [[E_ON_STACK]], align 8 +// CHECK2-NEXT: [[ADD1:%.*]] = fadd double [[TMP12]], [[TMP13]] +// CHECK2-NEXT: store double [[ADD1]], double* [[TMP2]], align 8 +// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[E1]], i32 8) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[E]], i32 8) // CHECK2-NEXT: ret void // // @@ -1673,6 +1701,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 @@ -1691,9 +1720,13 @@ // CHECK2-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D3]] to float* // CHECK2-NEXT: store float [[TMP2]], float* [[D_ON_STACK]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i8* [[C2]], i8** [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store float* [[D_ON_STACK]], float** [[TMP5]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR4]] +// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[D3]], i32 4) // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[C2]], i32 1) // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -1703,61 +1736,62 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[C:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 4 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 4 -// CHECK2-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 -// CHECK2-NEXT: [[C1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) -// CHECK2-NEXT: [[D2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) -// CHECK2-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D2]] to float* -// CHECK2-NEXT: store i8 0, i8* [[C1]], align 1 +// CHECK2-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load float*, float** [[TMP3]], align 4 +// CHECK2-NEXT: [[C:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[D:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D]] to float* +// CHECK2-NEXT: store i8 0, i8* [[C]], align 1 // CHECK2-NEXT: store float 1.000000e+00, float* [[D_ON_STACK]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i8, i8* [[C1]], align 1 -// CHECK2-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK2-NEXT: [[TMP5:%.*]] = load i8, i8* [[C]], align 1 +// CHECK2-NEXT: [[CONV:%.*]] = sext i8 [[TMP5]] to i32 // CHECK2-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 -// CHECK2-NEXT: store i8 [[CONV3]], i8* [[C1]], align 1 -// CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[D_ON_STACK]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i32 [[XOR]] to i8 +// CHECK2-NEXT: store i8 [[CONV1]], i8* [[C]], align 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[D_ON_STACK]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = fmul float [[TMP6]], 3.300000e+01 // CHECK2-NEXT: store float [[MUL]], float* [[D_ON_STACK]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store i8* [[C1]], i8** [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast float* [[D_ON_STACK]] to i8* -// CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i8* [[TMP10]], i32 1024, i8* [[TMP9]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func5, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func6, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func7, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func8) -// CHECK2-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1 -// CHECK2-NEXT: br i1 [[TMP12]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store i8* [[C]], i8** [[TMP9]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP11:%.*]] = bitcast float* [[D_ON_STACK]] to i8* +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i8* [[TMP13]], i32 1024, i8* [[TMP12]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func5, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func6, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func7, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func8) +// CHECK2-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 1 +// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP0]], align 1 -// CHECK2-NEXT: [[CONV4:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8, i8* [[C1]], align 1 -// CHECK2-NEXT: [[CONV5:%.*]] = sext i8 [[TMP14]] to i32 -// CHECK2-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] -// CHECK2-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 -// CHECK2-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 -// CHECK2-NEXT: [[TMP15:%.*]] = load float, float* [[TMP1]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load float, float* [[D_ON_STACK]], align 4 -// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP16]] -// CHECK2-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK2-NEXT: [[TMP16:%.*]] = load i8, i8* [[TMP2]], align 1 +// CHECK2-NEXT: [[CONV2:%.*]] = sext i8 [[TMP16]] to i32 +// CHECK2-NEXT: [[TMP17:%.*]] = load i8, i8* [[C]], align 1 +// CHECK2-NEXT: [[CONV3:%.*]] = sext i8 [[TMP17]] to i32 +// CHECK2-NEXT: [[XOR4:%.*]] = xor i32 [[CONV2]], [[CONV3]] +// CHECK2-NEXT: [[CONV5:%.*]] = trunc i32 [[XOR4]] to i8 +// CHECK2-NEXT: store i8 [[CONV5]], i8* [[TMP2]], align 1 +// CHECK2-NEXT: [[TMP18:%.*]] = load float, float* [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load float, float* [[D_ON_STACK]], align 4 +// CHECK2-NEXT: [[MUL6:%.*]] = fmul float [[TMP18]], [[TMP19]] +// CHECK2-NEXT: store float [[MUL6]], float* [[TMP4]], align 4 +// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP8]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[D2]], i32 4) -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[C1]], i32 1) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[D]], i32 4) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[C]], i32 1) // CHECK2-NEXT: ret void // // @@ -1946,18 +1980,18 @@ // CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* // CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* +// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP7]] // CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]], align 1 // CHECK2-NEXT: store i8 [[TMP11]], i8* [[TMP10]], align 128 // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 // CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 // CHECK2-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 1 +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 // CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP7]] // CHECK2-NEXT: [[TMP16:%.*]] = load float, float* [[TMP14]], align 4 // CHECK2-NEXT: store float [[TMP16]], float* [[TMP15]], align 128 @@ -1975,14 +2009,14 @@ // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP5]] // CHECK2-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 1 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP5]] // CHECK2-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* // CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 @@ -2004,18 +2038,18 @@ // CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* // CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* +// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP7]] // CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 128 // CHECK2-NEXT: store i8 [[TMP11]], i8* [[TMP9]], align 1 // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 // CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 // CHECK2-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 1 +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 // CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP7]] // CHECK2-NEXT: [[TMP16:%.*]] = load float, float* [[TMP15]], align 128 // CHECK2-NEXT: store float [[TMP16]], float* [[TMP14]], align 4 @@ -2033,14 +2067,14 @@ // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP5]] // CHECK2-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 1 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP5]] // CHECK2-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* // CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 @@ -2055,6 +2089,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 @@ -2065,9 +2100,13 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i32* [[A_ADDR]], i32** [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store i16* [[CONV]], i16** [[TMP3]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[A_ADDR]], i16* [[CONV]]) #[[ATTR4]] +// CHECK2-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -2075,139 +2114,146 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 -// CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK2-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK2-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK2-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK2-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @__omp_outlined__10 to i8*), i8* null, i8** [[TMP8]], i32 2) -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i8* [[TMP14]], i32 1024, i8* [[TMP13]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func15, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func16, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func17, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func18, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func19, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func20) -// CHECK2-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 -// CHECK2-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK2-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK2-NEXT: store i16 -32768, i16* [[B]], align 2 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i32* [[A]], i32** [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store i16* [[B]], i16** [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = bitcast %struct.anon.3* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK2-NEXT: [[TMP8:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK2-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP7]], i8* [[TMP8]]) +// CHECK2-NEXT: [[TMP10:%.*]] = load [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP9]] to %struct.anon.3* +// CHECK2-NEXT: store [[STRUCT_ANON_3]] [[TMP10]], %struct.anon.3* [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.3*)* @__omp_outlined__10 to i8*), i8* null, i8* [[TMP9]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[TMP8]], i32 8) +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP15:%.*]] = bitcast i32* [[A]] to i8* +// CHECK2-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP17:%.*]] = bitcast i16* [[B]] to i8* +// CHECK2-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i8* [[TMP19]], i32 1024, i8* [[TMP18]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func15, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func16, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func17, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func18, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func19, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func20) +// CHECK2-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP20]], 1 +// CHECK2-NEXT: br i1 [[TMP21]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK2-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[OR]], i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 -// CHECK2-NEXT: [[TMP20:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: [[OR:%.*]] = or i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: store i32 [[OR]], i32* [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i16, i16* [[TMP4]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP24]] to i32 +// CHECK2-NEXT: [[TMP25:%.*]] = load i16, i16* [[B]], align 2 +// CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP25]] to i32 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV1]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK2-NEXT: [[TMP26:%.*]] = load i16, i16* [[TMP4]], align 2 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP22:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK2-NEXT: [[TMP27:%.*]] = load i16, i16* [[B]], align 2 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i16 [[COND]], i16* [[TMP1]], align 2 -// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) +// CHECK2-NEXT: [[COND:%.*]] = phi i16 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i16 [[COND]], i16* [[TMP4]], align 2 +// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP13]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 -// CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK2-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK2-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK2-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 -// CHECK2-NEXT: store i32 [[OR]], i32* [[A1]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK2-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK2-NEXT: store i16 -32768, i16* [[B]], align 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: [[OR:%.*]] = or i32 [[TMP5]], 1 +// CHECK2-NEXT: store i32 [[OR]], i32* [[A]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i16, i16* [[B]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[B]], align 2 +// CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP7]] to i32 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 -// CHECK2-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK2-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 2, i32 8, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func12, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func13) -// CHECK2-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 -// CHECK2-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV1]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[COND]] to i16 +// CHECK2-NEXT: store i16 [[CONV2]], i16* [[B]], align 2 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i32* [[A]] to i8* +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP13:%.*]] = bitcast i16* [[B]] to i8* +// CHECK2-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], i32 2, i32 8, i8* [[TMP14]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func12, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func13) +// CHECK2-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 +// CHECK2-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK2-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] -// CHECK2-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK2-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK2-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK2-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] -// CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK2: cond.true9: -// CHECK2-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK2-NEXT: br label [[COND_END11:%.*]] -// CHECK2: cond.false10: -// CHECK2-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK2-NEXT: br label [[COND_END11]] -// CHECK2: cond.end11: -// CHECK2-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] -// CHECK2-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 -// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: [[OR3:%.*]] = or i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: store i32 [[OR3]], i32* [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i16, i16* [[TMP4]], align 2 +// CHECK2-NEXT: [[CONV4:%.*]] = sext i16 [[TMP19]] to i32 +// CHECK2-NEXT: [[TMP20:%.*]] = load i16, i16* [[B]], align 2 +// CHECK2-NEXT: [[CONV5:%.*]] = sext i16 [[TMP20]] to i32 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[CONV4]], [[CONV5]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// CHECK2: cond.true7: +// CHECK2-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP4]], align 2 +// CHECK2-NEXT: br label [[COND_END9:%.*]] +// CHECK2: cond.false8: +// CHECK2-NEXT: [[TMP22:%.*]] = load i16, i16* [[B]], align 2 +// CHECK2-NEXT: br label [[COND_END9]] +// CHECK2: cond.end9: +// CHECK2-NEXT: [[COND10:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE7]] ], [ [[TMP22]], [[COND_FALSE8]] ] +// CHECK2-NEXT: store i16 [[COND10]], i16* [[TMP4]], align 2 +// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP9]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: // CHECK2-NEXT: ret void @@ -2581,19 +2627,19 @@ // CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* // CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* +// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP7]] // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 // CHECK2-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 128 // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 // CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 // CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP7]] // CHECK2-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]], align 2 // CHECK2-NEXT: store i16 [[TMP17]], i16* [[TMP16]], align 128 @@ -2611,15 +2657,15 @@ // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP5]] // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* // CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP5]] // CHECK2-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* // CHECK2-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 @@ -2641,19 +2687,19 @@ // CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* // CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* +// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP7]] // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 128 // CHECK2-NEXT: store i32 [[TMP12]], i32* [[TMP10]], align 4 // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 // CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 // CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP7]] // CHECK2-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP16]], align 128 // CHECK2-NEXT: store i16 [[TMP17]], i16* [[TMP15]], align 2 @@ -2671,15 +2717,15 @@ // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP5]] // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* // CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP5]] // CHECK2-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* // CHECK2-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 @@ -2694,6 +2740,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[E1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 @@ -2705,9 +2752,11 @@ // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: [[TMP3:%.*]] = load double, double* [[TMP0]], align 8 // CHECK3-NEXT: store double [[TMP3]], double* [[E1]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[E1]], double** [[TMP4]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E1]]) #[[ATTR4:[0-9]+]] +// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -2715,41 +2764,43 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 -// CHECK3-NEXT: [[E1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) -// CHECK3-NEXT: [[E_ON_STACK:%.*]] = bitcast i8* [[E1]] to double* +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 4 +// CHECK3-NEXT: [[E:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK3-NEXT: [[E_ON_STACK:%.*]] = bitcast i8* [[E]] to double* // CHECK3-NEXT: store double 0.000000e+00, double* [[E_ON_STACK]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load double, double* [[E_ON_STACK]], align 8 -// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 +// CHECK3-NEXT: [[TMP3:%.*]] = load double, double* [[E_ON_STACK]], align 8 +// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], 5.000000e+00 // CHECK3-NEXT: store double [[ADD]], double* [[E_ON_STACK]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast double* [[E_ON_STACK]] to i8* -// CHECK3-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i8* [[TMP7]], i32 2048, i8* [[TMP6]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func) -// CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 -// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = bitcast double* [[E_ON_STACK]] to i8* +// CHECK3-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP9:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i8* [[TMP9]], i32 2048, i8* [[TMP8]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func) +// CHECK3-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1 +// CHECK3-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP10:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = load double, double* [[E_ON_STACK]], align 8 -// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 -// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP12:%.*]] = load double, double* [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = load double, double* [[E_ON_STACK]], align 8 +// CHECK3-NEXT: [[ADD1:%.*]] = fadd double [[TMP12]], [[TMP13]] +// CHECK3-NEXT: store double [[ADD1]], double* [[TMP2]], align 8 +// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: -// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[E1]], i32 8) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[E]], i32 8) // CHECK3-NEXT: ret void // // @@ -2991,6 +3042,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 @@ -3009,9 +3061,13 @@ // CHECK3-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D3]] to float* // CHECK3-NEXT: store float [[TMP2]], float* [[D_ON_STACK]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i8* [[C2]], i8** [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store float* [[D_ON_STACK]], float** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR4]] +// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[D3]], i32 4) // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[C2]], i32 1) // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -3021,61 +3077,62 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[C:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 4 -// CHECK3-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[C1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) -// CHECK3-NEXT: [[D2:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) -// CHECK3-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D2]] to float* -// CHECK3-NEXT: store i8 0, i8* [[C1]], align 1 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load float*, float** [[TMP3]], align 4 +// CHECK3-NEXT: [[C:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK3-NEXT: [[D:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) +// CHECK3-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D]] to float* +// CHECK3-NEXT: store i8 0, i8* [[C]], align 1 // CHECK3-NEXT: store float 1.000000e+00, float* [[D_ON_STACK]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[C1]], align 1 -// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[C]], align 1 +// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP5]] to i32 // CHECK3-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 -// CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 -// CHECK3-NEXT: store i8 [[CONV3]], i8* [[C1]], align 1 -// CHECK3-NEXT: [[TMP3:%.*]] = load float, float* [[D_ON_STACK]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[XOR]] to i8 +// CHECK3-NEXT: store i8 [[CONV1]], i8* [[C]], align 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load float, float* [[D_ON_STACK]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = fmul float [[TMP6]], 3.300000e+01 // CHECK3-NEXT: store float [[MUL]], float* [[D_ON_STACK]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store i8* [[C1]], i8** [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast float* [[D_ON_STACK]] to i8* -// CHECK3-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i8* [[TMP10]], i32 2048, i8* [[TMP9]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func5, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func6, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func7, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func8) -// CHECK3-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store i8* [[C]], i8** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast float* [[D_ON_STACK]] to i8* +// CHECK3-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i8* [[TMP13]], i32 2048, i8* [[TMP12]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func5, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func6, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func7, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func8) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 1 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP0]], align 1 -// CHECK3-NEXT: [[CONV4:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[C1]], align 1 -// CHECK3-NEXT: [[CONV5:%.*]] = sext i8 [[TMP14]] to i32 -// CHECK3-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] -// CHECK3-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 -// CHECK3-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load float, float* [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load float, float* [[D_ON_STACK]], align 4 -// CHECK3-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP16]] -// CHECK3-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i8, i8* [[TMP2]], align 1 +// CHECK3-NEXT: [[CONV2:%.*]] = sext i8 [[TMP16]] to i32 +// CHECK3-NEXT: [[TMP17:%.*]] = load i8, i8* [[C]], align 1 +// CHECK3-NEXT: [[CONV3:%.*]] = sext i8 [[TMP17]] to i32 +// CHECK3-NEXT: [[XOR4:%.*]] = xor i32 [[CONV2]], [[CONV3]] +// CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[XOR4]] to i8 +// CHECK3-NEXT: store i8 [[CONV5]], i8* [[TMP2]], align 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load float, float* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load float, float* [[D_ON_STACK]], align 4 +// CHECK3-NEXT: [[MUL6:%.*]] = fmul float [[TMP18]], [[TMP19]] +// CHECK3-NEXT: store float [[MUL6]], float* [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP8]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: -// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[D2]], i32 4) -// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[C1]], i32 1) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[D]], i32 4) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[C]], i32 1) // CHECK3-NEXT: ret void // // @@ -3264,18 +3321,18 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* // CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* +// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i8], [2048 x i8]* [[C]], i32 0, i32 [[TMP7]] // CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]], align 1 // CHECK3-NEXT: store i8 [[TMP11]], i8* [[TMP10]], align 128 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 // CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 // CHECK3-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 1 +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2048 x float], [2048 x float]* [[D]], i32 0, i32 [[TMP7]] // CHECK3-NEXT: [[TMP16:%.*]] = load float, float* [[TMP14]], align 4 // CHECK3-NEXT: store float [[TMP16]], float* [[TMP15]], align 128 @@ -3293,14 +3350,14 @@ // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i8], [2048 x i8]* [[C]], i32 0, i32 [[TMP5]] // CHECK3-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 1 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2048 x float], [2048 x float]* [[D]], i32 0, i32 [[TMP5]] // CHECK3-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* // CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 @@ -3322,18 +3379,18 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* // CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* +// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i8], [2048 x i8]* [[C]], i32 0, i32 [[TMP7]] // CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 128 // CHECK3-NEXT: store i8 [[TMP11]], i8* [[TMP9]], align 1 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 // CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 // CHECK3-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 1 +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2048 x float], [2048 x float]* [[D]], i32 0, i32 [[TMP7]] // CHECK3-NEXT: [[TMP16:%.*]] = load float, float* [[TMP15]], align 128 // CHECK3-NEXT: store float [[TMP16]], float* [[TMP14]], align 4 @@ -3351,14 +3408,14 @@ // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i8], [2048 x i8]* [[C]], i32 0, i32 [[TMP5]] // CHECK3-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 1 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2048 x float], [2048 x float]* [[D]], i32 0, i32 [[TMP5]] // CHECK3-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* // CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 @@ -3373,6 +3430,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 @@ -3383,9 +3441,13 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[A_ADDR]], i32** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i16* [[CONV]], i16** [[TMP3]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[A_ADDR]], i16* [[CONV]]) #[[ATTR4]] +// CHECK3-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -3393,139 +3455,146 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 -// CHECK3-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK3-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK3-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK3-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK3-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @__omp_outlined__10 to i8*), i8* null, i8** [[TMP8]], i32 2) -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i8* [[TMP14]], i32 2048, i8* [[TMP13]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func15, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func16, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func17, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func18, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func19, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func20) -// CHECK3-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK3-NEXT: store i16 -32768, i16* [[B]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[A]], i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i16* [[B]], i16** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = bitcast %struct.anon.3* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK3-NEXT: [[TMP8:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK3-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP7]], i8* [[TMP8]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP9]] to %struct.anon.3* +// CHECK3-NEXT: store [[STRUCT_ANON_3]] [[TMP10]], %struct.anon.3* [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.3*)* @__omp_outlined__10 to i8*), i8* null, i8* [[TMP9]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[TMP8]], i32 8) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i32* [[A]] to i8* +// CHECK3-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i16* [[B]] to i8* +// CHECK3-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP19:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i8* [[TMP19]], i32 2048, i8* [[TMP18]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func15, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func16, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func17, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func18, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func19, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func20) +// CHECK3-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP20]], 1 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: store i32 [[OR]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 -// CHECK3-NEXT: [[TMP20:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: store i32 [[OR]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i16, i16* [[TMP4]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP24]] to i32 +// CHECK3-NEXT: [[TMP25:%.*]] = load i16, i16* [[B]], align 2 +// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP25]] to i32 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV1]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK3-NEXT: [[TMP26:%.*]] = load i16, i16* [[TMP4]], align 2 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP22:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK3-NEXT: [[TMP27:%.*]] = load i16, i16* [[B]], align 2 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i16 [[COND]], i16* [[TMP1]], align 2 -// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) +// CHECK3-NEXT: [[COND:%.*]] = phi i16 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i16 [[COND]], i16* [[TMP4]], align 2 +// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP13]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 -// CHECK3-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK3-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK3-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 -// CHECK3-NEXT: store i32 [[OR]], i32* [[A1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK3-NEXT: store i16 -32768, i16* [[B]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 [[OR]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[B]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[B]], align 2 +// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK3-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 2, i32 8, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func12, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func13) -// CHECK3-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV1]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[COND]] to i16 +// CHECK3-NEXT: store i16 [[CONV2]], i16* [[B]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast i32* [[A]] to i8* +// CHECK3-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast i16* [[B]] to i8* +// CHECK3-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], i32 2, i32 8, i8* [[TMP14]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func12, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func13) +// CHECK3-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 +// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK3-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK3-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK3-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK3-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK3: cond.true9: -// CHECK3-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK3-NEXT: br label [[COND_END11:%.*]] -// CHECK3: cond.false10: -// CHECK3-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK3-NEXT: br label [[COND_END11]] -// CHECK3: cond.end11: -// CHECK3-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] -// CHECK3-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 -// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[OR3:%.*]] = or i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: store i32 [[OR3]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i16, i16* [[TMP4]], align 2 +// CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP19]] to i32 +// CHECK3-NEXT: [[TMP20:%.*]] = load i16, i16* [[B]], align 2 +// CHECK3-NEXT: [[CONV5:%.*]] = sext i16 [[TMP20]] to i32 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[CONV4]], [[CONV5]] +// CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// CHECK3: cond.true7: +// CHECK3-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP4]], align 2 +// CHECK3-NEXT: br label [[COND_END9:%.*]] +// CHECK3: cond.false8: +// CHECK3-NEXT: [[TMP22:%.*]] = load i16, i16* [[B]], align 2 +// CHECK3-NEXT: br label [[COND_END9]] +// CHECK3: cond.end9: +// CHECK3-NEXT: [[COND10:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE7]] ], [ [[TMP22]], [[COND_FALSE8]] ] +// CHECK3-NEXT: store i16 [[COND10]], i16* [[TMP4]], align 2 +// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP9]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: // CHECK3-NEXT: ret void @@ -3899,19 +3968,19 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* // CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* +// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP7]] // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 // CHECK3-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 128 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 // CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 // CHECK3-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP7]] // CHECK3-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]], align 2 // CHECK3-NEXT: store i16 [[TMP17]], i16* [[TMP16]], align 128 @@ -3929,15 +3998,15 @@ // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP5]] // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* // CHECK3-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP5]] // CHECK3-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* // CHECK3-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 @@ -3959,19 +4028,19 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* // CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* +// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP7]] // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 128 // CHECK3-NEXT: store i32 [[TMP12]], i32* [[TMP10]], align 4 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 // CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 // CHECK3-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP7]] // CHECK3-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP16]], align 128 // CHECK3-NEXT: store i16 [[TMP17]], i16* [[TMP15]], align 2 @@ -3989,15 +4058,15 @@ // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP5]] // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* // CHECK3-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP5]] // CHECK3-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* // CHECK3-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 diff --git a/clang/test/OpenMP/openmp_win_codegen.cpp b/clang/test/OpenMP/openmp_win_codegen.cpp --- a/clang/test/OpenMP/openmp_win_codegen.cpp +++ b/clang/test/OpenMP/openmp_win_codegen.cpp @@ -49,34 +49,38 @@ // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @"?main@Test@@SAXXZ"() -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { // CHECK1-NEXT: entry: +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[T:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: invoke void @"?foo@@YAXXZ"() // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[CATCH_DISPATCH:%.*]] // CHECK1: catch.dispatch: -// CHECK1-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch] unwind label [[TERMINATE:%.*]] +// CHECK1-NEXT: [[TMP1:%.*]] = catchswitch within none [label %catch] unwind label [[TERMINATE:%.*]] // CHECK1: catch: -// CHECK1-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [%rtti.TypeDescriptor2* @"??_R0H@8", i32 0, i32* %t] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], [8 x i32]* @.gomp_critical_user_.var) [ "funclet"(token [[TMP1]]) ] -// CHECK1-NEXT: invoke void @"?bar@@YAXXZ"() [ "funclet"(token [[TMP1]]) ] +// CHECK1-NEXT: [[TMP2:%.*]] = catchpad within [[TMP1]] [%rtti.TypeDescriptor2* @"??_R0H@8", i32 0, i32* %t] +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.var) [ "funclet"(token [[TMP2]]) ] +// CHECK1-NEXT: invoke void @"?bar@@YAXXZ"() [ "funclet"(token [[TMP2]]) ] // CHECK1-NEXT: to label [[INVOKE_CONT1:%.*]] unwind label [[TERMINATE2:%.*]] // CHECK1: invoke.cont1: -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], [8 x i32]* @.gomp_critical_user_.var) [ "funclet"(token [[TMP1]]) ] -// CHECK1-NEXT: catchret from [[TMP1]] to label [[CATCHRET_DEST:%.*]] +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.var) [ "funclet"(token [[TMP2]]) ] +// CHECK1-NEXT: catchret from [[TMP2]] to label [[CATCHRET_DEST:%.*]] // CHECK1: catchret.dest: // CHECK1-NEXT: br label [[TRY_CONT:%.*]] // CHECK1: try.cont: @@ -84,49 +88,51 @@ // CHECK1: invoke.cont: // CHECK1-NEXT: br label [[TRY_CONT]] // CHECK1: terminate: -// CHECK1-NEXT: [[TMP4:%.*]] = cleanuppad within none [] -// CHECK1-NEXT: call void @"?terminate@@YAXXZ"() #[[ATTR7:[0-9]+]] [ "funclet"(token [[TMP4]]) ] +// CHECK1-NEXT: [[TMP5:%.*]] = cleanuppad within none [] +// CHECK1-NEXT: call void @"?terminate@@YAXXZ"() #[[ATTR7:[0-9]+]] [ "funclet"(token [[TMP5]]) ] // CHECK1-NEXT: unreachable // CHECK1: terminate2: -// CHECK1-NEXT: [[TMP5:%.*]] = cleanuppad within [[TMP1]] [] -// CHECK1-NEXT: call void @"?terminate@@YAXXZ"() #[[ATTR7]] [ "funclet"(token [[TMP5]]) ] +// CHECK1-NEXT: [[TMP6:%.*]] = cleanuppad within [[TMP2]] [] +// CHECK1-NEXT: call void @"?terminate@@YAXXZ"() #[[ATTR7]] [ "funclet"(token [[TMP6]]) ] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[J:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[J_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[LOCAL_J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK1-NEXT: store i32* [[J]], i32** [[J_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[J_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK1-NEXT: store i32 3, i32* [[LOCAL_J]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: // CHECK1-NEXT: store i32 4, i32* [[LOCAL_J]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[LOCAL_J]] to i8* -// CHECK1-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i64 8, i8* [[TMP7]], void (i8*, i8*)* @.omp.copyprivate.copy_func, i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[LOCAL_J]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[LOCAL_J]] to i8* +// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK1-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i64 8, i8* [[TMP9]], void (i8*, i8*)* @.omp.copyprivate.copy_func, i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[LOCAL_J]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[TMP2]], align 4 // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp --- a/clang/test/OpenMP/parallel_codegen.cpp +++ b/clang/test/OpenMP/parallel_codegen.cpp @@ -78,6 +78,9 @@ // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 @@ -87,45 +90,56 @@ // CHECK1-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 // CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16 // CHECK1-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], i32* [[VLA]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP1]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP1]], i32* [[VLA]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(i8** noundef [[TMP3]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[VLA]], i32** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[VLA]], i32** [[TMP7]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_2]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(i8** noundef [[TMP8]]) // CHECK1-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP4]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP9]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP10]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP5]]) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* @global, align 4 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1 -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* @global, align 4 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 1 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[ARRAYIDX1]], align 4 // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP7:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP5]]) #[[ATTR6:[0-9]+]] +// CHECK1-NEXT: [[TMP8:%.*]] = extractvalue { i8*, i32 } [[TMP7]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP8]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: unreachable // // @@ -145,105 +159,121 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[GLOBAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP0]], align 16 -// CHECK1-NEXT: store i64 [[TMP0]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP0]], i32* [[VLA1]], i32* [[GLOBAL]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP2]]) +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP2]], align 16 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[VLA]], i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[GLOBAL]], i32** [[TMP6]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP7]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR2]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[GLOBAL]], i32** [[GLOBAL_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[GLOBAL_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP3]]) +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP7]]) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[ARRAYIDX1]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 1 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX1]], align 4 // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP5:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP9:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP6]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP10:%.*]] = extractvalue { i8*, i32 } [[TMP9]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP10]]) #[[ATTR6]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP0]], i32* [[TMP1]]) +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP6]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP5]]) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* @global, align 4 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1 -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* @global, align 4 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 1 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[ARRAYIDX1]], align 4 // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP7:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP5]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP8:%.*]] = extractvalue { i8*, i32 } [[TMP7]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP8]]) #[[ATTR6]] // CHECK1-NEXT: unreachable // // @@ -251,6 +281,7 @@ // CHECK1-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR3]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP0]], i64 0 @@ -258,38 +289,43 @@ // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 0 // CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1 // CHECK1-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i8*** [[ARGC_ADDR]], i8**** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP3]], i64* [[TMP5]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[VAR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[TMP0]], align 8 -// CHECK1-NEXT: invoke void @_Z3fooIPPcEvT_(i8** noundef [[TMP2]]) +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8***, i8**** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8**, i8*** [[TMP2]], align 8 +// CHECK1-NEXT: invoke void @_Z3fooIPPcEvT_(i8** noundef [[TMP5]]) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: [[TMP3:%.*]] = load double*, double** [[VAR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = mul nsw i64 0, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 [[TMP4]] +// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[VAR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = mul nsw i64 0, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP7]] // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX]], i64 0 // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP5:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP8:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP6]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP9:%.*]] = extractvalue { i8*, i32 } [[TMP8]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP9]]) #[[ATTR6]] // CHECK1-NEXT: unreachable // // @@ -309,6 +345,9 @@ // CHECK2-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 // CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK2-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32* [[ARGC_ADDR]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19:![0-9]+]] @@ -322,59 +361,69 @@ // CHECK2-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG23]] // CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[__VLA_EXPR0]], metadata [[META24:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26:![0-9]+]] // CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32* [[VLA]], metadata [[META27:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31:![0-9]+]] -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], i32* [[VLA]]), !dbg [[DBG32:![0-9]+]] -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP1]]), !dbg [[DBG33:![0-9]+]] -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB9:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP1]], i32* [[VLA]]), !dbg [[DBG34:![0-9]+]] -// CHECK2-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8, !dbg [[DBG35:![0-9]+]] -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(i8** noundef [[TMP3]]), !dbg [[DBG36:![0-9]+]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG32:![0-9]+]] +// CHECK2-NEXT: store i64 [[TMP1]], i64* [[TMP3]], align 8, !dbg [[DBG32]] +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG32]] +// CHECK2-NEXT: store i32* [[VLA]], i32** [[TMP4]], align 8, !dbg [[DBG32]] +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG32]] +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0, !dbg [[DBG33:![0-9]+]] +// CHECK2-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8, !dbg [[DBG33]] +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]), !dbg [[DBG33]] +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0, !dbg [[DBG34:![0-9]+]] +// CHECK2-NEXT: store i64 [[TMP1]], i64* [[TMP6]], align 8, !dbg [[DBG34]] +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 1, !dbg [[DBG34]] +// CHECK2-NEXT: store i32* [[VLA]], i32** [[TMP7]], align 8, !dbg [[DBG34]] +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB9:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_2]]), !dbg [[DBG34]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8, !dbg [[DBG35:![0-9]+]] +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(i8** noundef [[TMP8]]), !dbg [[DBG36:![0-9]+]] // CHECK2-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4, !dbg [[DBG37:![0-9]+]] -// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG38:![0-9]+]] -// CHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP4]]), !dbg [[DBG38]] -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[RETVAL]], align 4, !dbg [[DBG38]] -// CHECK2-NEXT: ret i32 [[TMP5]], !dbg [[DBG38]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG38:![0-9]+]] +// CHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP9]]), !dbg [[DBG38]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[RETVAL]], align 4, !dbg [[DBG38]] +// CHECK2-NEXT: ret i32 [[TMP10]], !dbg [[DBG38]] // // -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__ -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG39:![0-9]+]] { +// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG39:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META47:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG51:![0-9]+]] // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META49:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]] -// CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]] -// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META51:![0-9]+]], metadata !DIExpression()), !dbg [[DBG52:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG53:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG53]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG54:![0-9]+]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG54]] -// CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG53]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META52:![0-9]+]], metadata !DIExpression()), !dbg [[DBG51]] +// CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata %struct.anon** [[__CONTEXT_ADDR]], metadata [[META53:![0-9]+]], metadata !DIExpression()), !dbg [[DBG51]] +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG54:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0, !dbg [[DBG54]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8, !dbg [[DBG54]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1, !dbg [[DBG54]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !dbg [[DBG54]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 1, !dbg [[DBG55:![0-9]+]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG55]] +// CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP5]]) +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG56:![0-9]+]] // CHECK2: invoke.cont: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* @global, align 4, !dbg [[DBG55:![0-9]+]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG56:![0-9]+]] -// CHECK2-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG57:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG55]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* @global, align 4, !dbg [[DBG57:![0-9]+]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 1, !dbg [[DBG58:![0-9]+]] +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG59:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG57]] // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 } -// CHECK2-NEXT: catch i8* null, !dbg [[DBG53]] -// CHECK2-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG53]] -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP5]]) #[[ATTR7:[0-9]+]], !dbg [[DBG53]] -// CHECK2-NEXT: unreachable, !dbg [[DBG53]] +// CHECK2-NEXT: [[TMP7:%.*]] = landingpad { i8*, i32 } +// CHECK2-NEXT: catch i8* null, !dbg [[DBG56]] +// CHECK2-NEXT: [[TMP8:%.*]] = extractvalue { i8*, i32 } [[TMP7]], 0, !dbg [[DBG56]] +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP8]]) #[[ATTR7:[0-9]+]], !dbg [[DBG56]] +// CHECK2-NEXT: unreachable, !dbg [[DBG56]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z3fooIiEvT_ -// CHECK2-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat !dbg [[DBG58:![0-9]+]] { +// CHECK2-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat !dbg [[DBG60:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32* [[ARGC_ADDR]], metadata [[META63:![0-9]+]], metadata !DIExpression()), !dbg [[DBG64:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG65:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32* [[ARGC_ADDR]], metadata [[META65:![0-9]+]], metadata !DIExpression()), !dbg [[DBG66:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG67:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@__clang_call_terminate @@ -384,331 +433,204 @@ // CHECK2-NEXT: unreachable // // -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG66:![0-9]+]] { +// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1 +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG70:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68:![0-9]+]] -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]] -// CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]] -// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG72:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG72]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG72]] -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG72]] -// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG72]] -// CHECK2-NEXT: call void @.omp_outlined._debug__(i32* [[TMP2]], i32* [[TMP3]], i64 [[TMP0]], i32* [[TMP4]]) #[[ATTR6]], !dbg [[DBG72]] -// CHECK2-NEXT: ret void, !dbg [[DBG72]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.1 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG75:![0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK2-NEXT: [[GLOBAL:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META78:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]] // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] -// CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META81:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG82:![0-9]+]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32* [[GLOBAL]], metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] -// CHECK2-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG82]] -// CHECK2-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG82]] -// CHECK2-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP0]], align 16, !dbg [[DBG82]] -// CHECK2-NEXT: store i64 [[TMP0]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG82]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[__VLA_EXPR0]], metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32* [[VLA1]], metadata [[META85:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP0]], i32* [[VLA1]], i32* [[GLOBAL]]), !dbg [[DBG82]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG86:![0-9]+]] -// CHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP2]]), !dbg [[DBG86]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META79:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] +// CHECK2-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata %struct.anon.0** [[__CONTEXT_ADDR]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG81:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0, !dbg [[DBG81]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8, !dbg [[DBG81]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32* [[GLOBAL]], metadata [[META82:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] +// CHECK2-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG81]] +// CHECK2-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG81]] +// CHECK2-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP2]], align 16, !dbg [[DBG81]] +// CHECK2-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG81]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[__VLA_EXPR0]], metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32* [[VLA]], metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG85:![0-9]+]] +// CHECK2-NEXT: store i64 [[TMP2]], i64* [[TMP4]], align 8, !dbg [[DBG85]] +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG85]] +// CHECK2-NEXT: store i32* [[VLA]], i32** [[TMP5]], align 8, !dbg [[DBG85]] +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG85]] +// CHECK2-NEXT: store i32* [[GLOBAL]], i32** [[TMP6]], align 8, !dbg [[DBG85]] +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG85]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG86:![0-9]+]] +// CHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP7]]), !dbg [[DBG86]] // CHECK2-NEXT: ret void, !dbg [[DBG88:![0-9]+]] // // -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.2 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG89:![0-9]+]] { +// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2 +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG89:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META92:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META96:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97:![0-9]+]] // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META94:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93]] -// CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META95:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93]] -// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META96:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97:![0-9]+]] -// CHECK2-NEXT: store i32* [[GLOBAL]], i32** [[GLOBAL_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[GLOBAL_ADDR]], metadata [[META98:![0-9]+]], metadata !DIExpression()), !dbg [[DBG99:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG100:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG100]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[GLOBAL_ADDR]], align 8, !dbg [[DBG100]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG101:![0-9]+]] -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG101]] -// CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP3]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG100]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META98:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97]] +// CHECK2-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata %struct.anon.1** [[__CONTEXT_ADDR]], metadata [[META99:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97]] +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG100:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0, !dbg [[DBG100]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8, !dbg [[DBG100]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1, !dbg [[DBG100]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !dbg [[DBG100]] +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2, !dbg [[DBG100]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8, !dbg [[DBG100]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 1, !dbg [[DBG101:![0-9]+]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG101]] +// CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP7]]) +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG102:![0-9]+]] // CHECK2: invoke.cont: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4, !dbg [[DBG102:![0-9]+]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG103:![0-9]+]] -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG104:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG102]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP6]], align 4, !dbg [[DBG103:![0-9]+]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 1, !dbg [[DBG104:![0-9]+]] +// CHECK2-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG105:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG103]] // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP5:%.*]] = landingpad { i8*, i32 } -// CHECK2-NEXT: catch i8* null, !dbg [[DBG100]] -// CHECK2-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG100]] -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP6]]) #[[ATTR7]], !dbg [[DBG100]] -// CHECK2-NEXT: unreachable, !dbg [[DBG100]] +// CHECK2-NEXT: [[TMP9:%.*]] = landingpad { i8*, i32 } +// CHECK2-NEXT: catch i8* null, !dbg [[DBG102]] +// CHECK2-NEXT: [[TMP10:%.*]] = extractvalue { i8*, i32 } [[TMP9]], 0, !dbg [[DBG102]] +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP10]]) #[[ATTR7]], !dbg [[DBG102]] +// CHECK2-NEXT: unreachable, !dbg [[DBG102]] // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR3]] !dbg [[DBG105:![0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG106:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META106:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META113:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114:![0-9]+]] // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META108:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]] -// CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META109:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]] -// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META110:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]] -// CHECK2-NEXT: store i32* [[GLOBAL]], i32** [[GLOBAL_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[GLOBAL_ADDR]], metadata [[META111:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG112:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG112]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[GLOBAL_ADDR]], align 8, !dbg [[DBG112]] -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG112]] -// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG112]] -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG112]] -// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[GLOBAL_ADDR]], align 8, !dbg [[DBG112]] -// CHECK2-NEXT: call void @.omp_outlined._debug__.2(i32* [[TMP3]], i32* [[TMP4]], i64 [[TMP0]], i32* [[TMP5]], i32* [[TMP6]]) #[[ATTR6]], !dbg [[DBG112]] -// CHECK2-NEXT: ret void, !dbg [[DBG112]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]] +// CHECK2-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata %struct.anon.2** [[__CONTEXT_ADDR]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]] +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG117:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0, !dbg [[DBG117]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8, !dbg [[DBG117]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1, !dbg [[DBG117]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !dbg [[DBG117]] +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG118:![0-9]+]] +// CHECK2-NEXT: store i64 [[TMP2]], i64* [[TMP5]], align 8, !dbg [[DBG118]] +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG118]] +// CHECK2-NEXT: store i32* [[TMP4]], i32** [[TMP6]], align 8, !dbg [[DBG118]] +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB7:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG118]] +// CHECK2-NEXT: ret void, !dbg [[DBG119:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG113:![0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META114:![0-9]+]], metadata !DIExpression()), !dbg [[DBG115:![0-9]+]] -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG115]] -// CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG115]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG118:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG118]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG118]] -// CHECK2-NEXT: call void @.omp_outlined._debug__.1(i32* [[TMP1]], i32* [[TMP2]], i64 [[TMP0]]) #[[ATTR6]], !dbg [[DBG118]] -// CHECK2-NEXT: ret void, !dbg [[DBG118]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.5 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG119:![0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG120:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META127:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128:![0-9]+]] // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META122:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121]] -// CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121]] -// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META124:![0-9]+]], metadata !DIExpression()), !dbg [[DBG125:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG126:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG126]] -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB7:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP0]], i32* [[TMP1]]), !dbg [[DBG126]] -// CHECK2-NEXT: ret void, !dbg [[DBG127:![0-9]+]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.6 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG128:![0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130:![0-9]+]] -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] -// CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META132:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] -// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG135:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG135]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG136:![0-9]+]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG136]] -// CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG135]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128]] +// CHECK2-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata %struct.anon.3** [[__CONTEXT_ADDR]], metadata [[META130:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128]] +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG131:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0, !dbg [[DBG131]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8, !dbg [[DBG131]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1, !dbg [[DBG131]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !dbg [[DBG131]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 1, !dbg [[DBG132:![0-9]+]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG132]] +// CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP5]]) +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG133:![0-9]+]] // CHECK2: invoke.cont: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* @global, align 4, !dbg [[DBG137:![0-9]+]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG138:![0-9]+]] -// CHECK2-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG139:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG137]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* @global, align 4, !dbg [[DBG134:![0-9]+]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 1, !dbg [[DBG135:![0-9]+]] +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG136:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG134]] // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 } -// CHECK2-NEXT: catch i8* null, !dbg [[DBG135]] -// CHECK2-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG135]] -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP5]]) #[[ATTR7]], !dbg [[DBG135]] -// CHECK2-NEXT: unreachable, !dbg [[DBG135]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG140:![0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META141:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142:![0-9]+]] -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META143:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142]] -// CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142]] -// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG146:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG146]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG146]] -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG146]] -// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG146]] -// CHECK2-NEXT: call void @.omp_outlined._debug__.6(i32* [[TMP2]], i32* [[TMP3]], i64 [[TMP0]], i32* [[TMP4]]) #[[ATTR6]], !dbg [[DBG146]] -// CHECK2-NEXT: ret void, !dbg [[DBG146]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG147:![0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META148:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149:![0-9]+]] -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META150:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149]] -// CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META151:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149]] -// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META152:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG153:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG153]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG153]] -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG153]] -// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG153]] -// CHECK2-NEXT: call void @.omp_outlined._debug__.5(i32* [[TMP2]], i32* [[TMP3]], i64 [[TMP0]], i32* [[TMP4]]) #[[ATTR6]], !dbg [[DBG153]] -// CHECK2-NEXT: ret void, !dbg [[DBG153]] +// CHECK2-NEXT: [[TMP7:%.*]] = landingpad { i8*, i32 } +// CHECK2-NEXT: catch i8* null, !dbg [[DBG133]] +// CHECK2-NEXT: [[TMP8:%.*]] = extractvalue { i8*, i32 } [[TMP7]], 0, !dbg [[DBG133]] +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP8]]) #[[ATTR7]], !dbg [[DBG133]] +// CHECK2-NEXT: unreachable, !dbg [[DBG133]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_ -// CHECK2-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG154:![0-9]+]] { +// CHECK2-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG137:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK2-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i8*** [[ARGC_ADDR]], metadata [[META159:![0-9]+]], metadata !DIExpression()), !dbg [[DBG160:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8, !dbg [[DBG161:![0-9]+]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP0]], i64 0, !dbg [[DBG161]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8, !dbg [[DBG161]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 0, !dbg [[DBG161]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1, !dbg [[DBG161]] -// CHECK2-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64, !dbg [[DBG162:![0-9]+]] -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB11:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 [[TMP3]]), !dbg [[DBG163:![0-9]+]] -// CHECK2-NEXT: ret i32 0, !dbg [[DBG164:![0-9]+]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.9 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG165:![0-9]+]] { +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i8*** [[ARGC_ADDR]], metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG143:![0-9]+]] +// CHECK2-NEXT: [[TMP0:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8, !dbg [[DBG144:![0-9]+]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP0]], i64 0, !dbg [[DBG144]] +// CHECK2-NEXT: [[TMP1:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8, !dbg [[DBG144]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 0, !dbg [[DBG144]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1, !dbg [[DBG144]] +// CHECK2-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64, !dbg [[DBG145:![0-9]+]] +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG146:![0-9]+]] +// CHECK2-NEXT: store i8*** [[ARGC_ADDR]], i8**** [[TMP4]], align 8, !dbg [[DBG146]] +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG146]] +// CHECK2-NEXT: store i64 [[TMP3]], i64* [[TMP5]], align 8, !dbg [[DBG146]] +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB11:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG146]] +// CHECK2-NEXT: ret i32 0, !dbg [[DBG147:![0-9]+]] +// +// +// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5 +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG148:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK2-NEXT: [[VAR:%.*]] = alloca double*, align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META155:![0-9]+]], metadata !DIExpression()), !dbg [[DBG156:![0-9]+]] // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170]] -// CHECK2-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i8**** [[ARGC_ADDR]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG173:![0-9]+]] -// CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META174:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8, !dbg [[DBG175:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG175]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[TMP0]], align 8, !dbg [[DBG176:![0-9]+]] -// CHECK2-NEXT: invoke void @_Z3fooIPPcEvT_(i8** noundef [[TMP2]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG178:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG156]] +// CHECK2-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata %struct.anon.4** [[__CONTEXT_ADDR]], metadata [[META158:![0-9]+]], metadata !DIExpression()), !dbg [[DBG156]] +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG159:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0, !dbg [[DBG159]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i8***, i8**** [[TMP1]], align 8, !dbg [[DBG159]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1, !dbg [[DBG159]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8, !dbg [[DBG159]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i8**, i8*** [[TMP2]], align 8, !dbg [[DBG160:![0-9]+]] +// CHECK2-NEXT: invoke void @_Z3fooIPPcEvT_(i8** noundef [[TMP5]]) +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG162:![0-9]+]] // CHECK2: invoke.cont: -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata double** [[VAR]], metadata [[META179:![0-9]+]], metadata !DIExpression()), !dbg [[DBG186:![0-9]+]] -// CHECK2-NEXT: [[TMP3:%.*]] = load double*, double** [[VAR]], align 8, !dbg [[DBG187:![0-9]+]] -// CHECK2-NEXT: [[TMP4:%.*]] = mul nsw i64 0, [[TMP1]], !dbg [[DBG187]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 [[TMP4]], !dbg [[DBG187]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX]], i64 0, !dbg [[DBG187]] -// CHECK2-NEXT: ret void, !dbg [[DBG188:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata double** [[VAR]], metadata [[META163:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170:![0-9]+]] +// CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[VAR]], align 8, !dbg [[DBG171:![0-9]+]] +// CHECK2-NEXT: [[TMP7:%.*]] = mul nsw i64 0, [[TMP4]], !dbg [[DBG171]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP7]], !dbg [[DBG171]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX]], i64 0, !dbg [[DBG171]] +// CHECK2-NEXT: ret void, !dbg [[DBG172:![0-9]+]] // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP5:%.*]] = landingpad { i8*, i32 } -// CHECK2-NEXT: catch i8* null, !dbg [[DBG178]] -// CHECK2-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG178]] -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP6]]) #[[ATTR7]], !dbg [[DBG178]] -// CHECK2-NEXT: unreachable, !dbg [[DBG178]] +// CHECK2-NEXT: [[TMP8:%.*]] = landingpad { i8*, i32 } +// CHECK2-NEXT: catch i8* null, !dbg [[DBG162]] +// CHECK2-NEXT: [[TMP9:%.*]] = extractvalue { i8*, i32 } [[TMP8]], 0, !dbg [[DBG162]] +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP9]]) #[[ATTR7]], !dbg [[DBG162]] +// CHECK2-NEXT: unreachable, !dbg [[DBG162]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z3fooIPPcEvT_ -// CHECK2-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG189:![0-9]+]] { +// CHECK2-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG173:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 // CHECK2-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i8*** [[ARGC_ADDR]], metadata [[META192:![0-9]+]], metadata !DIExpression()), !dbg [[DBG193:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG194:![0-9]+]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG195:![0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197:![0-9]+]] -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META198:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197]] -// CHECK2-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i8**** [[ARGC_ADDR]], metadata [[META199:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197]] -// CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8, !dbg [[DBG201:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG201]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG201]] -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG201]] -// CHECK2-NEXT: [[TMP4:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8, !dbg [[DBG201]] -// CHECK2-NEXT: call void @.omp_outlined._debug__.9(i32* [[TMP2]], i32* [[TMP3]], i8*** [[TMP4]], i64 [[TMP1]]) #[[ATTR6]], !dbg [[DBG201]] -// CHECK2-NEXT: ret void, !dbg [[DBG201]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i8*** [[ARGC_ADDR]], metadata [[META176:![0-9]+]], metadata !DIExpression()), !dbg [[DBG177:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG178:![0-9]+]] // // // CHECK3-LABEL: define {{[^@]+}}@main @@ -765,6 +687,8 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* @global, align 4 // CHECK3-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[LOADGEP_VLA]], i64 1 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4 +// CHECK3-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] +// CHECK3: omp.par.region.parallel.after: // CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK3: omp.par.pre_finalize: // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] @@ -830,6 +754,8 @@ // CHECK3-NEXT: [[TMP5:%.*]] = mul nsw i64 0, [[TMP2]] // CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 [[TMP5]] // CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX2]], i64 0 +// CHECK3-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] +// CHECK3: omp.par.region.parallel.after: // CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK3: omp.par.pre_finalize: // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] @@ -903,7 +829,9 @@ // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* @global, align 4, !dbg [[DBG35]] // CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[LOADGEP_VLA]], i64 1, !dbg [[DBG35]] // CHECK4-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG35]] -// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]], !dbg [[DBG35]] +// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG35]] +// CHECK4: omp.par.region.parallel.after: +// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG35]] // CHECK4: omp.par.outlined.exit.exitStub: @@ -971,7 +899,9 @@ // CHECK4-NEXT: [[TMP5:%.*]] = mul nsw i64 0, [[TMP2]], !dbg [[DBG66]] // CHECK4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 [[TMP5]], !dbg [[DBG66]] // CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX2]], i64 0, !dbg [[DBG66]] -// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]], !dbg [[DBG67:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG67:![0-9]+]] +// CHECK4: omp.par.region.parallel.after: +// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG67]] // CHECK4: omp.par.outlined.exit.exitStub: diff --git a/clang/test/OpenMP/parallel_copyin_codegen.cpp b/clang/test/OpenMP/parallel_copyin_codegen.cpp --- a/clang/test/OpenMP/parallel_copyin_codegen.cpp +++ b/clang/test/OpenMP/parallel_copyin_codegen.cpp @@ -215,6 +215,8 @@ // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_6:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) @@ -251,10 +253,10 @@ // CHECK1-NEXT: call void @__cxa_guard_release(i64* @_ZGVZ4mainE3var) #[[ATTR3]] // CHECK1-NEXT: br label [[INIT_END5]] // CHECK1: init.end5: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*)) -// CHECK1-NEXT: [[CALL6:%.*]] = call noundef i32 @_Z5tmainIiET_v() -// CHECK1-NEXT: store i32 [[CALL6]], i32* [[RETVAL]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_6]]) +// CHECK1-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// CHECK1-NEXT: store i32 [[CALL7]], i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR3]] // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK1-NEXT: ret i32 [[TMP8]] @@ -378,31 +380,34 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast (i32* @_ZZ4mainE5t_var to i8*), i64 4, i8*** @_ZZ4mainE5t_var.cache.) -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32* -// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[TMP3]] to i64 -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (i32* @_ZZ4mainE5t_var to i64), [[TMP4]] -// CHECK1-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i32* @_ZZ4mainE5t_var to i8*), i64 4, i8*** @_ZZ4mainE5t_var.cache.) +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i32* +// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint i32* [[TMP4]] to i64 +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i64 ptrtoint (i32* @_ZZ4mainE5t_var to i64), [[TMP5]] +// CHECK1-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK1: copyin.not.master: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* @_ZZ4mainE5t_var, align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast ([2 x i32]* @_ZZ4mainE3vec to i8*), i64 8, i8*** @_ZZ4mainE3vec.cache.) -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to [2 x i32]* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[TMP8]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP9]], i8* align 4 bitcast ([2 x i32]* @_ZZ4mainE3vec to i8*), i64 8, i1 false) -// CHECK1-NEXT: [[TMP10:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast ([2 x %struct.S]* @_ZZ4mainE5s_arr to i8*), i64 8, i8*** @_ZZ4mainE5s_arr.cache.) -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to [2 x %struct.S]* -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP11]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* @_ZZ4mainE5t_var, align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast ([2 x i32]* @_ZZ4mainE3vec to i8*), i64 8, i8*** @_ZZ4mainE3vec.cache.) +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to [2 x i32]* +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP9]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP10]], i8* align 4 bitcast ([2 x i32]* @_ZZ4mainE3vec to i8*), i64 8, i1 false) +// CHECK1-NEXT: [[TMP11:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast ([2 x %struct.S]* @_ZZ4mainE5s_arr to i8*), i64 8, i8*** @_ZZ4mainE5s_arr.cache.) +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to [2 x %struct.S]* +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP12]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0), [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] @@ -410,68 +415,73 @@ // CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done1: -// CHECK1-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast (%struct.S* @_ZZ4mainE3var to i8*), i64 4, i8*** @_ZZ4mainE3var.cache.) -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to %struct.S* -// CHECK1-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP14]], %struct.S* noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE3var) +// CHECK1-NEXT: [[TMP14:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (%struct.S* @_ZZ4mainE3var to i8*), i64 4, i8*** @_ZZ4mainE3var.cache.) +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to %struct.S* +// CHECK1-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP15]], %struct.S* noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE3var) // CHECK1-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK1: copyin.not.master.end: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast (i32* @_ZZ4mainE5t_var to i8*), i64 4, i8*** @_ZZ4mainE5t_var.cache.) -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to i32* -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast ([2 x i32]* @_ZZ4mainE3vec to i8*), i64 8, i8*** @_ZZ4mainE3vec.cache.) -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to [2 x i32]* -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP19]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast (%struct.S* @_ZZ4mainE3var to i8*), i64 4, i8*** @_ZZ4mainE3var.cache.) -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to %struct.S* -// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast ([2 x %struct.S]* @_ZZ4mainE5s_arr to i8*), i64 8, i8*** @_ZZ4mainE5s_arr.cache.) -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP22]] to [2 x %struct.S]* -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP23]], i64 0, i64 0 -// CHECK1-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP21]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i32* @_ZZ4mainE5t_var to i8*), i64 4, i8*** @_ZZ4mainE5t_var.cache.) +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast ([2 x i32]* @_ZZ4mainE3vec to i8*), i64 8, i8*** @_ZZ4mainE3vec.cache.) +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to [2 x i32]* +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP20]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (%struct.S* @_ZZ4mainE3var to i8*), i64 4, i8*** @_ZZ4mainE3var.cache.) +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to %struct.S* +// CHECK1-NEXT: [[TMP23:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast ([2 x %struct.S]* @_ZZ4mainE5s_arr to i8*), i64 8, i8*** @_ZZ4mainE5s_arr.cache.) +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP23]] to [2 x %struct.S]* +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP24]], i64 0, i64 0 +// CHECK1-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP22]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast (i32* @_ZZ4mainE5t_var to i8*), i64 4, i8*** @_ZZ4mainE5t_var.cache.) -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32* -// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[TMP3]] to i64 -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (i32* @_ZZ4mainE5t_var to i64), [[TMP4]] -// CHECK1-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i32* @_ZZ4mainE5t_var to i8*), i64 4, i8*** @_ZZ4mainE5t_var.cache.) +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i32* +// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint i32* [[TMP4]] to i64 +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i64 ptrtoint (i32* @_ZZ4mainE5t_var to i64), [[TMP5]] +// CHECK1-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK1: copyin.not.master: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* @_ZZ4mainE5t_var, align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* @_ZZ4mainE5t_var, align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[TMP4]], align 4 // CHECK1-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK1: copyin.not.master.end: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP7:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast (i32* @_ZZ4mainE5t_var to i8*), i64 4, i8*** @_ZZ4mainE5t_var.cache.) -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32* -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i32* @_ZZ4mainE5t_var to i8*), i64 4, i8*** @_ZZ4mainE5t_var.cache.) +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP9]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v // CHECK1-SAME: () #[[ATTR2]] comdat { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR3]] +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_1]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_6:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.1* @_ZN1SIiEaSERKS0_(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR3]] // CHECK1-NEXT: [[TMP0:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ5tmainIiET_vE5s_arr to i8*) acquire, align 8 // CHECK1-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 // CHECK1-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3]] @@ -481,9 +491,9 @@ // CHECK1-NEXT: br i1 [[TOBOOL]], label [[INIT:%.*]], label [[INIT_END]] // CHECK1: init: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast ([2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr to i8*), i8* (i8*)* @.__kmpc_global_ctor_..4, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..5) -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 0, i64 0), i32 noundef 1) -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 0, i64 1), i32 noundef 2) +// CHECK1-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast ([2 x %struct.S.1]* @_ZZ5tmainIiET_vE5s_arr to i8*), i8* (i8*)* @.__kmpc_global_ctor_..4, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..5) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x %struct.S.1], [2 x %struct.S.1]* @_ZZ5tmainIiET_vE5s_arr, i64 0, i64 0), i32 noundef 1) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x %struct.S.1], [2 x %struct.S.1]* @_ZZ5tmainIiET_vE5s_arr, i64 0, i64 1), i32 noundef 2) // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__cxa_atexit(void (i8*)* @__cxx_global_array_dtor.6, i8* null, i8* @__dso_handle) #[[ATTR3]] // CHECK1-NEXT: call void @__cxa_guard_release(i64* @_ZGVZ5tmainIiET_vE5s_arr) #[[ATTR3]] // CHECK1-NEXT: br label [[INIT_END]] @@ -497,15 +507,15 @@ // CHECK1-NEXT: br i1 [[TOBOOL3]], label [[INIT4:%.*]], label [[INIT_END5]] // CHECK1: init4: // CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%struct.S.0* @_ZZ5tmainIiET_vE3var to i8*), i8* (i8*)* @.__kmpc_global_ctor_..7, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..8) -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE3var, i32 noundef 3) -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S.0*)* @_ZN1SIiED1Ev to void (i8*)*), i8* bitcast (%struct.S.0* @_ZZ5tmainIiET_vE3var to i8*), i8* @__dso_handle) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%struct.S.1* @_ZZ5tmainIiET_vE3var to i8*), i8* (i8*)* @.__kmpc_global_ctor_..7, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..8) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE3var, i32 noundef 3) +// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S.1*)* @_ZN1SIiED1Ev to void (i8*)*), i8* bitcast (%struct.S.1* @_ZZ5tmainIiET_vE3var to i8*), i8* @__dso_handle) #[[ATTR3]] // CHECK1-NEXT: call void @__cxa_guard_release(i64* @_ZGVZ5tmainIiET_vE3var) #[[ATTR3]] // CHECK1-NEXT: br label [[INIT_END5]] // CHECK1: init.end5: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*)) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR3]] +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_6]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // // @@ -555,33 +565,33 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEaSERKS0_ -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP0]], %struct.S.0** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: ret %struct.S.0* [[THIS1]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[TMP0]], %struct.S.1** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: ret %struct.S.1* [[THIS1]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]] // CHECK1-NEXT: ret void // // @@ -591,25 +601,25 @@ // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [2 x %struct.S.0]* -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [2 x %struct.S.1]* +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 // CHECK1-NEXT: ret i8* [[TMP3]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK1-NEXT: ret void // // @@ -619,14 +629,14 @@ // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = bitcast i8* [[TMP1]] to %struct.S.0* -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = bitcast i8* [[TMP1]] to %struct.S.1* +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: ret void @@ -639,10 +649,10 @@ // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0) +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ getelementptr inbounds ([2 x %struct.S.1], [2 x %struct.S.1]* @_ZZ5tmainIiET_vE5s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S.1], [2 x %struct.S.1]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0) // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: ret void @@ -654,8 +664,8 @@ // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S.0* -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP2]], i32 noundef 3) +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S.1* +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP2]], i32 noundef 3) // CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 // CHECK1-NEXT: ret i8* [[TMP3]] // @@ -666,100 +676,106 @@ // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S.0* -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP2]]) #[[ATTR3]] +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.S.1* +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP2]]) #[[ATTR3]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast (i32* @_ZZ5tmainIiET_vE5t_var to i8*), i64 4, i8*** @_ZZ5tmainIiET_vE5t_var.cache.) -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32* -// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[TMP3]] to i64 -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (i32* @_ZZ5tmainIiET_vE5t_var to i64), [[TMP4]] -// CHECK1-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i32* @_ZZ5tmainIiET_vE5t_var to i8*), i64 4, i8*** @_ZZ5tmainIiET_vE5t_var.cache.) +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i32* +// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint i32* [[TMP4]] to i64 +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i64 ptrtoint (i32* @_ZZ5tmainIiET_vE5t_var to i64), [[TMP5]] +// CHECK1-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK1: copyin.not.master: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* @_ZZ5tmainIiET_vE5t_var, align 128 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[TMP3]], align 128 -// CHECK1-NEXT: [[TMP7:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast ([2 x i32]* @_ZZ5tmainIiET_vE3vec to i8*), i64 8, i8*** @_ZZ5tmainIiET_vE3vec.cache.) -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to [2 x i32]* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[TMP8]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP9]], i8* align 128 bitcast ([2 x i32]* @_ZZ5tmainIiET_vE3vec to i8*), i64 8, i1 false) -// CHECK1-NEXT: [[TMP10:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast ([2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr to i8*), i64 8, i8*** @_ZZ5tmainIiET_vE5s_arr.cache.) -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to [2 x %struct.S.0]* -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP11]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* @_ZZ5tmainIiET_vE5t_var, align 128 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[TMP4]], align 128 +// CHECK1-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast ([2 x i32]* @_ZZ5tmainIiET_vE3vec to i8*), i64 8, i8*** @_ZZ5tmainIiET_vE3vec.cache.) +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to [2 x i32]* +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP9]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP10]], i8* align 128 bitcast ([2 x i32]* @_ZZ5tmainIiET_vE3vec to i8*), i64 8, i1 false) +// CHECK1-NEXT: [[TMP11:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast ([2 x %struct.S.1]* @_ZZ5tmainIiET_vE5s_arr to i8*), i64 8, i8*** @_ZZ5tmainIiET_vE5s_arr.cache.) +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to [2 x %struct.S.1]* +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP12]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S_1:%.*]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ getelementptr inbounds ([2 x %struct.S.1], [2 x %struct.S.1]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.1* @_ZN1SIiEaSERKS0_(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done1: -// CHECK1-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast (%struct.S.0* @_ZZ5tmainIiET_vE3var to i8*), i64 4, i8*** @_ZZ5tmainIiET_vE3var.cache.) -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to %struct.S.0* -// CHECK1-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP14]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE3var) +// CHECK1-NEXT: [[TMP14:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (%struct.S.1* @_ZZ5tmainIiET_vE3var to i8*), i64 4, i8*** @_ZZ5tmainIiET_vE3var.cache.) +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to %struct.S.1* +// CHECK1-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.1* @_ZN1SIiEaSERKS0_(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP15]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE3var) // CHECK1-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK1: copyin.not.master.end: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast (i32* @_ZZ5tmainIiET_vE5t_var to i8*), i64 4, i8*** @_ZZ5tmainIiET_vE5t_var.cache.) -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to i32* -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 128 -// CHECK1-NEXT: [[TMP18:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast ([2 x i32]* @_ZZ5tmainIiET_vE3vec to i8*), i64 8, i8*** @_ZZ5tmainIiET_vE3vec.cache.) -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to [2 x i32]* -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP19]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 128 -// CHECK1-NEXT: [[TMP20:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast (%struct.S.0* @_ZZ5tmainIiET_vE3var to i8*), i64 4, i8*** @_ZZ5tmainIiET_vE3var.cache.) -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to %struct.S.0* -// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast ([2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr to i8*), i64 8, i8*** @_ZZ5tmainIiET_vE5s_arr.cache.) -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP22]] to [2 x %struct.S.0]* -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP23]], i64 0, i64 0 -// CHECK1-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP21]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i32* @_ZZ5tmainIiET_vE5t_var to i8*), i64 4, i8*** @_ZZ5tmainIiET_vE5t_var.cache.) +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 128 +// CHECK1-NEXT: [[TMP19:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast ([2 x i32]* @_ZZ5tmainIiET_vE3vec to i8*), i64 8, i8*** @_ZZ5tmainIiET_vE3vec.cache.) +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to [2 x i32]* +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP20]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 128 +// CHECK1-NEXT: [[TMP21:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (%struct.S.1* @_ZZ5tmainIiET_vE3var to i8*), i64 4, i8*** @_ZZ5tmainIiET_vE3var.cache.) +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to %struct.S.1* +// CHECK1-NEXT: [[TMP23:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast ([2 x %struct.S.1]* @_ZZ5tmainIiET_vE5s_arr to i8*), i64 8, i8*** @_ZZ5tmainIiET_vE5s_arr.cache.) +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP23]] to [2 x %struct.S.1]* +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP24]], i64 0, i64 0 +// CHECK1-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.1* @_ZN1SIiEaSERKS0_(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP22]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast (i32* @_ZZ5tmainIiET_vE5t_var to i8*), i64 4, i8*** @_ZZ5tmainIiET_vE5t_var.cache.) -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32* -// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[TMP3]] to i64 -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (i32* @_ZZ5tmainIiET_vE5t_var to i64), [[TMP4]] -// CHECK1-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i32* @_ZZ5tmainIiET_vE5t_var to i8*), i64 4, i8*** @_ZZ5tmainIiET_vE5t_var.cache.) +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i32* +// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint i32* [[TMP4]] to i64 +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i64 ptrtoint (i32* @_ZZ5tmainIiET_vE5t_var to i64), [[TMP5]] +// CHECK1-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK1: copyin.not.master: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* @_ZZ5tmainIiET_vE5t_var, align 128 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[TMP3]], align 128 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* @_ZZ5tmainIiET_vE5t_var, align 128 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[TMP4]], align 128 // CHECK1-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK1: copyin.not.master.end: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (i32* @g to i8*), i64 4, i8*** @g.cache.) // CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* // CHECK1-NEXT: [[TMP3:%.*]] = load volatile i32, i32* [[TMP2]], align 128 @@ -768,24 +784,24 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* bitcast (i32* @g to i8*), i64 4, i8*** @g.cache.) // CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32* @@ -806,29 +822,32 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 1 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i8* bitcast (i32* @g to i8*), i64 4, i8*** @g.cache.) -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32* -// CHECK3-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[TMP3]] to i64 -// CHECK3-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (i32* @g to i64), [[TMP4]] -// CHECK3-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i8* bitcast (i32* @g to i8*), i64 4, i8*** @g.cache.) +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i32* +// CHECK3-NEXT: [[TMP5:%.*]] = ptrtoint i32* [[TMP4]] to i64 +// CHECK3-NEXT: [[TMP6:%.*]] = icmp ne i64 ptrtoint (i32* @g to i64), [[TMP5]] +// CHECK3-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK3: copyin.not.master: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* @g, align 128 -// CHECK3-NEXT: store volatile i32 [[TMP6]], i32* [[TMP3]], align 128 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* @g, align 128 +// CHECK3-NEXT: store volatile i32 [[TMP7]], i32* [[TMP4]], align 128 // CHECK3-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK3: copyin.not.master.end: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP7:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast (i32* @g to i8*), i64 4, i8*** @g.cache.) -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32* -// CHECK3-NEXT: store volatile i32 1, i32* [[TMP8]], align 128 +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i32* @g to i8*), i64 4, i8*** @g.cache.) +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* +// CHECK3-NEXT: store volatile i32 1, i32* [[TMP9]], align 128 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -849,39 +868,43 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast (i32* @g to i8*), i64 4, i8*** @g.cache.) -// CHECK4-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32* -// CHECK4-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[TMP3]] to i64 -// CHECK4-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (i32* @g to i64), [[TMP4]] -// CHECK4-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i32* @g to i8*), i64 4, i8*** @g.cache.) +// CHECK4-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i32* +// CHECK4-NEXT: [[TMP5:%.*]] = ptrtoint i32* [[TMP4]] to i64 +// CHECK4-NEXT: [[TMP6:%.*]] = icmp ne i64 ptrtoint (i32* @g to i64), [[TMP5]] +// CHECK4-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK4: copyin.not.master: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* @g, align 128 -// CHECK4-NEXT: store volatile i32 [[TMP6]], i32* [[TMP3]], align 128 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* @g, align 128 +// CHECK4-NEXT: store volatile i32 [[TMP7]], i32* [[TMP4]], align 128 // CHECK4-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK4: copyin.not.master.end: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP7:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast (i32* @g to i8*), i64 4, i8*** @g.cache.) -// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32* -// CHECK4-NEXT: store volatile i32 1, i32* [[TMP8]], align 128 -// CHECK4-NEXT: [[TMP9:%.*]] = load i8*, i8** getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global.2 to %struct.__block_literal_generic*), i32 0, i32 3), align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP10]](i8* noundef bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global.2 to i8*)) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i32* @g to i8*), i64 4, i8*** @g.cache.) +// CHECK4-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* +// CHECK4-NEXT: store volatile i32 1, i32* [[TMP9]], align 128 +// CHECK4-NEXT: [[TMP10:%.*]] = load i8*, i8** getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global.2 to %struct.__block_literal_generic*), i32 0, i32 3), align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP11]](i8* noundef bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global.2 to i8*)) // CHECK4-NEXT: ret void // // @@ -903,6 +926,7 @@ // CHECK5-LABEL: define {{[^@]+}}@_Z10array_funcv // CHECK5-SAME: () #[[ATTR0:[0-9]+]] { // CHECK5-NEXT: entry: +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK5-NEXT: [[TMP0:%.*]] = load atomic i8, i8* bitcast (i64* @_ZGVZ10array_funcvE1s to i8*) acquire, align 8 // CHECK5-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 // CHECK5-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3:![0-9]+]] @@ -925,7 +949,7 @@ // CHECK5-NEXT: call void @__cxa_guard_release(i64* @_ZGVZ10array_funcvE1s) #[[ATTR1]] // CHECK5-NEXT: br label [[INIT_END]] // CHECK5: init.end: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // @@ -1006,27 +1030,30 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast ([2 x i32]* @_ZZ10array_funcvE1a to i8*), i64 8, i8*** @_ZZ10array_funcvE1a.cache.) -// CHECK5-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x i32]* -// CHECK5-NEXT: [[TMP4:%.*]] = ptrtoint [2 x i32]* [[TMP3]] to i64 -// CHECK5-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint ([2 x i32]* @_ZZ10array_funcvE1a to i64), [[TMP4]] -// CHECK5-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast ([2 x i32]* @_ZZ10array_funcvE1a to i8*), i64 8, i8*** @_ZZ10array_funcvE1a.cache.) +// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i32]* +// CHECK5-NEXT: [[TMP5:%.*]] = ptrtoint [2 x i32]* [[TMP4]] to i64 +// CHECK5-NEXT: [[TMP6:%.*]] = icmp ne i64 ptrtoint ([2 x i32]* @_ZZ10array_funcvE1a to i64), [[TMP5]] +// CHECK5-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK5: copyin.not.master: -// CHECK5-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP3]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP6]], i8* align 4 bitcast ([2 x i32]* @_ZZ10array_funcvE1a to i8*), i64 8, i1 false) -// CHECK5-NEXT: [[TMP7:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast ([2 x %struct.St]* @_ZZ10array_funcvE1s to i8*), i64 16, i8*** @_ZZ10array_funcvE1s.cache.) -// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to [2 x %struct.St]* -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.St], [2 x %struct.St]* [[TMP8]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_ST:%.*]], %struct.St* [[ARRAY_BEGIN]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.St* [[ARRAY_BEGIN]], [[TMP9]] +// CHECK5-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP7]], i8* align 4 bitcast ([2 x i32]* @_ZZ10array_funcvE1a to i8*), i64 8, i1 false) +// CHECK5-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast ([2 x %struct.St]* @_ZZ10array_funcvE1s to i8*), i64 16, i8*** @_ZZ10array_funcvE1s.cache.) +// CHECK5-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to [2 x %struct.St]* +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.St], [2 x %struct.St]* [[TMP9]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_ST:%.*]], %struct.St* [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.St* [[ARRAY_BEGIN]], [[TMP10]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: // CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.St* [ getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i32 0, i32 0), [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] @@ -1034,12 +1061,12 @@ // CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(8) %struct.St* @_ZN2StaSERKS_(%struct.St* noundef nonnull align 4 dereferenceable(8) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.St* noundef nonnull align 4 dereferenceable(8) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_ST]], %struct.St* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_ST]], %struct.St* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.St* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.St* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP10]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK5: omp.arraycpy.done1: // CHECK5-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK5: copyin.not.master.end: -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // @@ -1082,6 +1109,8 @@ // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK11-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_4:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK11-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) @@ -1106,13 +1135,23 @@ // CHECK11-NEXT: store i8 1, i8* @_ZGVZ4mainE3var, align 1 // CHECK11-NEXT: br label [[INIT_END3]] // CHECK11: init.end3: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* @_ZZ4mainE5t_var, [2 x i32]* @_ZZ4mainE3vec, [2 x %struct.S]* @_ZZ4mainE5s_arr, %struct.S* @_ZZ4mainE3var) -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* @_ZZ4mainE5t_var) -// CHECK11-NEXT: [[CALL4:%.*]] = call noundef i32 @_Z5tmainIiET_v() -// CHECK11-NEXT: store i32 [[CALL4]], i32* [[RETVAL]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* @_ZZ4mainE5t_var, i32** [[TMP4]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store [2 x i32]* @_ZZ4mainE3vec, [2 x i32]** [[TMP5]], align 8 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S]* @_ZZ4mainE5s_arr, [2 x %struct.S]** [[TMP6]], align 8 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store %struct.S* @_ZZ4mainE3var, %struct.S** [[TMP7]], align 8 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 0 +// CHECK11-NEXT: store i32* @_ZZ4mainE5t_var, i32** [[TMP8]], align 8 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_4]]) +// CHECK11-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// CHECK11-NEXT: store i32 [[CALL5]], i32* [[RETVAL]], align 4 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP9]] // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -1176,36 +1215,35 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[TMP0]] to i64 -// CHECK11-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], ptrtoint (i32* @_ZZ4mainE5t_var to i64) -// CHECK11-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = ptrtoint i32* [[TMP2]] to i64 +// CHECK11-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], ptrtoint (i32* @_ZZ4mainE5t_var to i64) +// CHECK11-NEXT: br i1 [[TMP10]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK11: copyin.not.master: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* @_ZZ4mainE5t_var, align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast ([2 x i32]* @_ZZ4mainE3vec to i8*), i8* align 4 [[TMP7]], i64 8, i1 false) -// CHECK11-NEXT: [[TMP8:%.*]] = bitcast [2 x %struct.S]* [[TMP2]] to %struct.S* +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* @_ZZ4mainE5t_var, align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast ([2 x i32]* @_ZZ4mainE3vec to i8*), i8* align 4 [[TMP12]], i64 8, i1 false) +// CHECK11-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* // CHECK11-NEXT: br i1 icmp eq (%struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0), %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i64 1, i64 0)), label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP8]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0), [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 @@ -1213,41 +1251,43 @@ // CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i64 1, i64 0) // CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK11: omp.arraycpy.done1: -// CHECK11-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE3var, %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]]) +// CHECK11-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE3var, %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]]) // CHECK11-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK11: copyin.not.master.end: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP10]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* @_ZZ4mainE5t_var, align 4 -// CHECK11-NEXT: store i32 [[TMP11]], i32* getelementptr inbounds ([2 x i32], [2 x i32]* @_ZZ4mainE3vec, i64 0, i64 0), align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* @_ZZ4mainE5t_var, align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* getelementptr inbounds ([2 x i32], [2 x i32]* @_ZZ4mainE3vec, i64 0, i64 0), align 4 // CHECK11-NEXT: [[CALL3:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i64 0, i64 0), %struct.S* noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE3var) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR5]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = ptrtoint i32* [[TMP0]] to i64 -// CHECK11-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], ptrtoint (i32* @_ZZ4mainE5t_var to i64) -// CHECK11-NEXT: br i1 [[TMP2]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP2]] to i64 +// CHECK11-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], ptrtoint (i32* @_ZZ4mainE5t_var to i64) +// CHECK11-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK11: copyin.not.master: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* @_ZZ4mainE5t_var, align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* @_ZZ4mainE5t_var, align 4 // CHECK11-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK11: copyin.not.master.end: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* @_ZZ4mainE5t_var, align 4 -// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* @_ZZ4mainE5t_var, align 4 +// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK11-NEXT: store i32 [[INC]], i32* @_ZZ4mainE5t_var, align 4 // CHECK11-NEXT: ret void // @@ -1255,18 +1295,20 @@ // CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiET_v // CHECK11-SAME: () #[[ATTR2]] comdat { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) -// CHECK11-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR4]] +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_1]], align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_4:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) +// CHECK11-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.1* @_ZN1SIiEaSERKS0_(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR4]] // CHECK11-NEXT: [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVZ5tmainIiET_vE5s_arr to i8*), align 8 // CHECK11-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 // CHECK11-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3]] // CHECK11: init.check: -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 0, i64 0), i32 noundef 1) -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 0, i64 1), i32 noundef 2) +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x %struct.S.1], [2 x %struct.S.1]* @_ZZ5tmainIiET_vE5s_arr, i64 0, i64 0), i32 noundef 1) +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x %struct.S.1], [2 x %struct.S.1]* @_ZZ5tmainIiET_vE5s_arr, i64 0, i64 1), i32 noundef 2) // CHECK11-NEXT: [[TMP1:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* @__cxx_global_array_dtor.2, i8* null, i8* @__dso_handle) #[[ATTR4]] // CHECK11-NEXT: store i8 1, i8* bitcast (i64* @_ZGVZ5tmainIiET_vE5s_arr to i8*), align 8 // CHECK11-NEXT: br label [[INIT_END]] @@ -1275,14 +1317,24 @@ // CHECK11-NEXT: [[GUARD_UNINITIALIZED1:%.*]] = icmp eq i8 [[TMP2]], 0 // CHECK11-NEXT: br i1 [[GUARD_UNINITIALIZED1]], label [[INIT_CHECK2:%.*]], label [[INIT_END3:%.*]], !prof [[PROF3]] // CHECK11: init.check2: -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE3var, i32 noundef 3) -// CHECK11-NEXT: [[TMP3:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S.0*)* @_ZN1SIiED1Ev to void (i8*)*), i8* bitcast (%struct.S.0* @_ZZ5tmainIiET_vE3var to i8*), i8* @__dso_handle) #[[ATTR4]] +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE3var, i32 noundef 3) +// CHECK11-NEXT: [[TMP3:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S.1*)* @_ZN1SIiED1Ev to void (i8*)*), i8* bitcast (%struct.S.1* @_ZZ5tmainIiET_vE3var to i8*), i8* @__dso_handle) #[[ATTR4]] // CHECK11-NEXT: store i8 1, i8* bitcast (i64* @_ZGVZ5tmainIiET_vE3var to i8*), align 8 // CHECK11-NEXT: br label [[INIT_END3]] // CHECK11: init.end3: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* @_ZZ5tmainIiET_vE5t_var, [2 x i32]* @_ZZ5tmainIiET_vE3vec, [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, %struct.S.0* @_ZZ5tmainIiET_vE3var) -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* @_ZZ5tmainIiET_vE5t_var) -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* @_ZZ5tmainIiET_vE5t_var, i32** [[TMP4]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store [2 x i32]* @_ZZ5tmainIiET_vE3vec, [2 x i32]** [[TMP5]], align 8 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S.1]* @_ZZ5tmainIiET_vE5s_arr, [2 x %struct.S.1]** [[TMP6]], align 8 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store %struct.S.1* @_ZZ5tmainIiET_vE3var, %struct.S.1** [[TMP7]], align 8 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 0 +// CHECK11-NEXT: store i32* @_ZZ5tmainIiET_vE5t_var, i32** [[TMP8]], align 8 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_4]]) +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK11-NEXT: ret i32 0 // // @@ -1326,46 +1378,46 @@ // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEaSERKS0_ -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK11-NEXT: [[DOTADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK11-NEXT: store %struct.S.0* [[TMP0]], %struct.S.0** [[DOTADDR]], align 8 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK11-NEXT: ret %struct.S.0* [[THIS1]] +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK11-NEXT: [[DOTADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK11-NEXT: store %struct.S.1* [[TMP0]], %struct.S.1** [[DOTADDR]], align 8 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK11-NEXT: ret %struct.S.1* [[THIS1]] // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK11-NEXT: ret void // // @@ -1376,119 +1428,120 @@ // CHECK11-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0) +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ getelementptr inbounds ([2 x %struct.S.1], [2 x %struct.S.1]* @_ZZ5tmainIiET_vE5s_arr, i64 1, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x %struct.S.1], [2 x %struct.S.1]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0) // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done1: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR5]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[TMP0]] to i64 -// CHECK11-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], ptrtoint (i32* @_ZZ5tmainIiET_vE5t_var to i64) -// CHECK11-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 8 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = ptrtoint i32* [[TMP2]] to i64 +// CHECK11-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], ptrtoint (i32* @_ZZ5tmainIiET_vE5t_var to i64) +// CHECK11-NEXT: br i1 [[TMP10]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK11: copyin.not.master: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 128 -// CHECK11-NEXT: store i32 [[TMP6]], i32* @_ZZ5tmainIiET_vE5t_var, align 128 -// CHECK11-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 bitcast ([2 x i32]* @_ZZ5tmainIiET_vE3vec to i8*), i8* align 128 [[TMP7]], i64 8, i1 false) -// CHECK11-NEXT: [[TMP8:%.*]] = bitcast [2 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK11-NEXT: br i1 icmp eq (%struct.S.0* getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), %struct.S.0* getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 1, i64 0)), label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 128 +// CHECK11-NEXT: store i32 [[TMP11]], i32* @_ZZ5tmainIiET_vE5t_var, align 128 +// CHECK11-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 bitcast ([2 x i32]* @_ZZ5tmainIiET_vE3vec to i8*), i8* align 128 [[TMP12]], i64 8, i1 false) +// CHECK11-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S.1]* [[TMP6]] to %struct.S.1* +// CHECK11-NEXT: br i1 icmp eq (%struct.S.1* getelementptr inbounds ([2 x %struct.S.1], [2 x %struct.S.1]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), %struct.S.1* getelementptr inbounds ([2 x %struct.S.1], [2 x %struct.S.1]* @_ZZ5tmainIiET_vE5s_arr, i64 1, i64 0)), label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP8]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 1, i64 0) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP13]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ getelementptr inbounds ([2 x %struct.S.1], [2 x %struct.S.1]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.1* @_ZN1SIiEaSERKS0_(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1:%.*]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], getelementptr inbounds ([2 x %struct.S.1], [2 x %struct.S.1]* @_ZZ5tmainIiET_vE5s_arr, i64 1, i64 0) // CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK11: omp.arraycpy.done1: -// CHECK11-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE3var, %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP3]]) +// CHECK11-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.1* @_ZN1SIiEaSERKS0_(%struct.S.1* noundef nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE3var, %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP8]]) // CHECK11-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK11: copyin.not.master.end: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* @_ZZ5tmainIiET_vE5t_var, align 128 -// CHECK11-NEXT: store i32 [[TMP11]], i32* getelementptr inbounds ([2 x i32], [2 x i32]* @_ZZ5tmainIiET_vE3vec, i64 0, i64 0), align 128 -// CHECK11-NEXT: [[CALL3:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 0, i64 0), %struct.S.0* noundef nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE3var) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* @_ZZ5tmainIiET_vE5t_var, align 128 +// CHECK11-NEXT: store i32 [[TMP16]], i32* getelementptr inbounds ([2 x i32], [2 x i32]* @_ZZ5tmainIiET_vE3vec, i64 0, i64 0), align 128 +// CHECK11-NEXT: [[CALL3:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.1* @_ZN1SIiEaSERKS0_(%struct.S.1* noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x %struct.S.1], [2 x %struct.S.1]* @_ZZ5tmainIiET_vE5s_arr, i64 0, i64 0), %struct.S.1* noundef nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE3var) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR5]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = ptrtoint i32* [[TMP0]] to i64 -// CHECK11-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], ptrtoint (i32* @_ZZ5tmainIiET_vE5t_var to i64) -// CHECK11-NEXT: br i1 [[TMP2]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP2]] to i64 +// CHECK11-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], ptrtoint (i32* @_ZZ5tmainIiET_vE5t_var to i64) +// CHECK11-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK11: copyin.not.master: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 128 -// CHECK11-NEXT: store i32 [[TMP3]], i32* @_ZZ5tmainIiET_vE5t_var, align 128 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 128 +// CHECK11-NEXT: store i32 [[TMP5]], i32* @_ZZ5tmainIiET_vE5t_var, align 128 // CHECK11-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK11: copyin.not.master.end: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 128 // CHECK11-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 128 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1512,27 +1565,29 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[G:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK13-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 1 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = ptrtoint i32* [[TMP0]] to i64 -// CHECK13-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], ptrtoint (i32* @g to i64) -// CHECK13-NEXT: br i1 [[TMP2]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK13-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP2]] to i64 +// CHECK13-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], ptrtoint (i32* @g to i64) +// CHECK13-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK13: copyin.not.master: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 128 -// CHECK13-NEXT: store volatile i32 [[TMP3]], i32* @g, align 128 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 128 +// CHECK13-NEXT: store volatile i32 [[TMP5]], i32* @g, align 128 // CHECK13-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK13: copyin.not.master.end: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK13-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]]) +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK13-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]]) // CHECK13-NEXT: store volatile i32 1, i32* @g, align 128 // CHECK13-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) // CHECK13-NEXT: ret void @@ -1559,38 +1614,43 @@ // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK14-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK14-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK14-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* // CHECK14-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* @g) +// CHECK14-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store i32* @g, i32** [[TMP0]], align 8 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[G:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = ptrtoint i32* [[TMP0]] to i64 -// CHECK14-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], ptrtoint (i32* @g to i64) -// CHECK14-NEXT: br i1 [[TMP2]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK14-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP2]] to i64 +// CHECK14-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], ptrtoint (i32* @g to i64) +// CHECK14-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK14: copyin.not.master: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 128 -// CHECK14-NEXT: store volatile i32 [[TMP3]], i32* @g, align 128 +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 128 +// CHECK14-NEXT: store volatile i32 [[TMP5]], i32* @g, align 128 // CHECK14-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK14: copyin.not.master.end: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK14-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]]) +// CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK14-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]]) // CHECK14-NEXT: store volatile i32 1, i32* @g, align 128 -// CHECK14-NEXT: [[TMP6:%.*]] = load i8*, i8** getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global.2 to %struct.__block_literal_generic*), i32 0, i32 3), align 8 -// CHECK14-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to void (i8*)* -// CHECK14-NEXT: call void [[TMP7]](i8* noundef bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global.2 to i8*)) +// CHECK14-NEXT: [[TMP8:%.*]] = load i8*, i8** getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global.2 to %struct.__block_literal_generic*), i32 0, i32 3), align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to void (i8*)* +// CHECK14-NEXT: call void [[TMP9]](i8* noundef bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global.2 to i8*)) // CHECK14-NEXT: ret void // // @@ -1614,6 +1674,7 @@ // CHECK15-LABEL: define {{[^@]+}}@_Z10array_funcv // CHECK15-SAME: () #[[ATTR0:[0-9]+]] { // CHECK15-NEXT: entry: +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK15-NEXT: [[TMP0:%.*]] = load i8, i8* @_ZGVZ10array_funcvE1s, align 1 // CHECK15-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 // CHECK15-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3:![0-9]+]] @@ -1630,7 +1691,11 @@ // CHECK15-NEXT: store i8 1, i8* @_ZGVZ10array_funcvE1s, align 1 // CHECK15-NEXT: br label [[INIT_END]] // CHECK15: init.end: -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, [2 x %struct.St]*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* @_ZZ10array_funcvE1a, [2 x %struct.St]* @_ZZ10array_funcvE1s) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store [2 x i32]* @_ZZ10array_funcvE1a, [2 x i32]** [[TMP2]], align 8 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store [2 x %struct.St]* @_ZZ10array_funcvE1s, [2 x %struct.St]** [[TMP3]], align 8 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // @@ -1671,28 +1736,29 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[A:%.*]], [2 x %struct.St]* noundef nonnull align 4 dereferenceable(16) [[S:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK15-NEXT: [[S_ADDR:%.*]] = alloca [2 x %struct.St]*, align 8 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK15-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[A_ADDR]], align 8 -// CHECK15-NEXT: store [2 x %struct.St]* [[S]], [2 x %struct.St]** [[S_ADDR]], align 8 -// CHECK15-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[A_ADDR]], align 8 -// CHECK15-NEXT: [[TMP1:%.*]] = load [2 x %struct.St]*, [2 x %struct.St]** [[S_ADDR]], align 8 -// CHECK15-NEXT: [[TMP2:%.*]] = ptrtoint [2 x i32]* [[TMP0]] to i64 -// CHECK15-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], ptrtoint ([2 x i32]* @_ZZ10array_funcvE1a to i64) -// CHECK15-NEXT: br i1 [[TMP3]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK15-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load [2 x %struct.St]*, [2 x %struct.St]** [[TMP3]], align 8 +// CHECK15-NEXT: [[TMP5:%.*]] = ptrtoint [2 x i32]* [[TMP2]] to i64 +// CHECK15-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], ptrtoint ([2 x i32]* @_ZZ10array_funcvE1a to i64) +// CHECK15-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK15: copyin.not.master: -// CHECK15-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast ([2 x i32]* @_ZZ10array_funcvE1a to i8*), i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK15-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.St]* [[TMP1]] to %struct.St* +// CHECK15-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast ([2 x i32]* @_ZZ10array_funcvE1a to i8*), i8* align 4 [[TMP7]], i64 8, i1 false) +// CHECK15-NEXT: [[TMP8:%.*]] = bitcast [2 x %struct.St]* [[TMP4]] to %struct.St* // CHECK15-NEXT: br i1 icmp eq (%struct.St* getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i32 0, i32 0), %struct.St* getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i64 1, i64 0)), label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.St* [ [[TMP5]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.St* [ [[TMP8]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.St* [ getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i32 0, i32 0), [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(8) %struct.St* @_ZN2StaSERKS_(%struct.St* noundef nonnull align 4 dereferenceable(8) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.St* noundef nonnull align 4 dereferenceable(8) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_ST:%.*]], %struct.St* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 @@ -1702,9 +1768,9 @@ // CHECK15: omp.arraycpy.done1: // CHECK15-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK15: copyin.not.master.end: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]]) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK15-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP10]]) // CHECK15-NEXT: ret void // // @@ -1752,49 +1818,58 @@ // CHECK16-LABEL: define {{[^@]+}}@_Z3foov // CHECK16-SAME: () #[[ATTR2:[0-9]+]] { // CHECK16-NEXT: entry: -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK16-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK16-NEXT: ret void // // // CHECK16-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK16-NEXT: entry: // CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK16-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK16-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK16-NEXT: [[TMP0:%.*]] = call i32* @_ZTW1t() -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[TMP0]]) +// CHECK16-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK16-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK16-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK16-NEXT: [[TMP2:%.*]] = call i32* @_ZTW1t() +// CHECK16-NEXT: store i32* [[TMP2]], i32** [[TMP1]], align 8 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK16-NEXT: ret void // // // CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T:%.*]]) #[[ATTR3]] { +// CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK16-NEXT: entry: // CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK16-NEXT: [[T_ADDR:%.*]] = alloca i32*, align 8 +// CHECK16-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK16-NEXT: store i32* [[T]], i32** [[T_ADDR]], align 8 -// CHECK16-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_ADDR]], align 8 -// CHECK16-NEXT: [[TMP1:%.*]] = call i32* @_ZTW1t() -// CHECK16-NEXT: [[TMP2:%.*]] = ptrtoint i32* [[TMP0]] to i64 -// CHECK16-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP1]] to i64 -// CHECK16-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] -// CHECK16-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK16-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK16-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK16-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK16-NEXT: [[TMP3:%.*]] = call i32* @_ZTW1t() +// CHECK16-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[TMP2]] to i64 +// CHECK16-NEXT: [[TMP5:%.*]] = ptrtoint i32* [[TMP3]] to i64 +// CHECK16-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP4]], [[TMP5]] +// CHECK16-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK16: copyin.not.master: -// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK16-NEXT: store i32 [[TMP5]], i32* [[TMP1]], align 4 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK16-NEXT: store i32 [[TMP7]], i32* [[TMP3]], align 4 // CHECK16-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK16: copyin.not.master.end: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK16-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]]) -// CHECK16-NEXT: [[TMP8:%.*]] = call i32* @_ZTW1t() +// CHECK16-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK16-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK16-NEXT: store i32 [[INC]], i32* [[TMP8]], align 4 +// CHECK16-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP9]]) +// CHECK16-NEXT: [[TMP10:%.*]] = call i32* @_ZTW1t() +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK16-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK16-NEXT: store i32 [[INC]], i32* [[TMP10]], align 4 // CHECK16-NEXT: ret void // // diff --git a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp --- a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp @@ -249,10 +249,9 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN2SSC1ERi(%struct.SS* nonnull align 4 dereferenceable(28) [[SS]], i32* nonnull align 4 dereferenceable(4) @_ZZ4mainE5sivar) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[TEST]]) @@ -264,34 +263,40 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i32 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* nonnull align 4 dereferenceable(4) [[VAR]], float 3.000000e+00) -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], i32 [[TMP2]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[VAR]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, i32* [[A]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[T_VAR_CASTED1]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[T_VAR_CASTED1]], align 4 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP10]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done2: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP8]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP11]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC1ERi @@ -331,71 +336,75 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [2 x i32]* nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 [[T_VAR:%.*]], [2 x %struct.S]* nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC1]] to i8* -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done3: -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* nonnull align 4 dereferenceable(4) [[VAR4]], %struct.S* nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 0 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 4, i1 false) -// CHECK1-NEXT: store i32 2, i32* [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 0 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast %struct.S* [[ARRAYIDX3]] to i8* +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP16]], i8* align 4 [[TMP17]], i32 4, i1 false) +// CHECK1-NEXT: store i32 2, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i32 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP10]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP18]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // @@ -443,23 +452,28 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK1-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[DOTT_VAR__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP1]], i32 4, i8* inttoptr (i32 1 to i8*)) +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: [[DOTT_VAR__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP4]], i32 4, i8* inttoptr (i32 1 to i8*)) // CHECK1-NEXT: [[DOTT_VAR__ADDR:%.*]] = bitcast i8* [[DOTT_VAR__VOID_ADDR]] to i32* -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTT_VAR__ADDR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTT_VAR__ADDR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTT_VAR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTT_VAR__ADDR]] to i8* -// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP1]], i8* [[TMP3]], i8* inttoptr (i32 1 to i8*)) +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTT_VAR__ADDR]] to i8* +// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP4]], i8* [[TMP6]], i8* inttoptr (i32 1 to i8*)) // CHECK1-NEXT: ret void // // @@ -467,39 +481,51 @@ // CHECK1-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK1-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 128 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2]], align 128 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN3SSTIiEC1Ev(%struct.SST* nonnull align 4 dereferenceable(4) [[SST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 128 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 128 [[TMP0]], i8* align 128 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR]], i32 3) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], i32* [[T_VAR]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[VAR]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[T_VAR]]) +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.2* nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 1) +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.2* nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.2* nonnull align 4 dereferenceable(4) [[VAR]], i32 3) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[TMP4]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP5]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i32 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done1: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done2: +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP7]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -511,9 +537,7 @@ // CHECK1-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[C7:%.*]] = alloca i32*, align 4 // CHECK1-NEXT: [[E:%.*]] = alloca [4 x i32]*, align 4 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK1-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 @@ -540,67 +564,78 @@ // CHECK1-NEXT: store i32* [[TMP1]], i32** [[C7]], align 4 // CHECK1-NEXT: [[E9:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 3 // CHECK1-NEXT: store [4 x i32]* [[E9]], [4 x i32]** [[E]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[C7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[C_CASTED]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[C_CASTED]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load [4 x i32]*, [4 x i32]** [[E]], align 4 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i32, i32, i32, [4 x i32]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP9]], [4 x i32]* [[TMP10]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A2]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[B4]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[C7]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load [4 x i32]*, [4 x i32]** [[E]], align 4 +// CHECK1-NEXT: store [4 x i32]* [[TMP12]], [4 x i32]** [[TMP11]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], [4 x i32]* nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[E_ADDR:%.*]] = alloca [4 x i32]*, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32*, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32*, align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca [4 x i32]*, align 4 -// CHECK1-NEXT: [[E3:%.*]] = alloca [4 x i32], align 4 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca [4 x i32]*, align 4 +// CHECK1-NEXT: [[E:%.*]] = alloca [4 x i32], align 4 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca [4 x i32]*, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 -// CHECK1-NEXT: store [4 x i32]* [[E]], [4 x i32]** [[E_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load [4 x i32]*, [4 x i32]** [[E_ADDR]], align 4 -// CHECK1-NEXT: store i32* [[A_ADDR]], i32** [[TMP]], align 4 -// CHECK1-NEXT: store i32* [[C_ADDR]], i32** [[_TMP1]], align 4 -// CHECK1-NEXT: store [4 x i32]* [[TMP1]], [4 x i32]** [[_TMP2]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load [4 x i32]*, [4 x i32]** [[_TMP2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [4 x i32]* [[E3]] to i8* -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast [4 x i32]* [[TMP2]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 16, i1 false) -// CHECK1-NEXT: store [4 x i32]* [[E3]], [4 x i32]** [[_TMP4]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP]], align 4 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1 -// CHECK1-NEXT: store i32 [[DEC]], i32* [[B_ADDR]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP1]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[DIV]], i32* [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load [4 x i32]*, [4 x i32]** [[_TMP4]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[TMP10]], i32 0, i32 2 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[C]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load [4 x i32]*, [4 x i32]** [[TMP9]], align 4 +// CHECK1-NEXT: store i32* [[A]], i32** [[TMP]], align 4 +// CHECK1-NEXT: store i32* [[C]], i32** [[_TMP1]], align 4 +// CHECK1-NEXT: store [4 x i32]* [[TMP10]], [4 x i32]** [[_TMP2]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load [4 x i32]*, [4 x i32]** [[_TMP2]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [4 x i32]* [[E]] to i8* +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [4 x i32]* [[TMP11]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 16, i1 false) +// CHECK1-NEXT: store [4 x i32]* [[E]], [4 x i32]** [[_TMP3]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP14]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP16]], -1 +// CHECK1-NEXT: store i32 [[DEC]], i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[_TMP1]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP18]], 1 +// CHECK1-NEXT: store i32 [[DIV]], i32* [[TMP17]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load [4 x i32]*, [4 x i32]** [[_TMP3]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: store i32 1111, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: ret void // @@ -687,12 +722,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK1-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 4 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK1-NEXT: ret void // // @@ -707,134 +742,135 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK1-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 4 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 4 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS1]], i32 [[TMP0]]) +// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS1]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [2 x i32]* nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 128 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 128 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 128 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 128 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[T_VAR1]], align 128 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 128 [[TMP5]], i8* align 128 [[TMP6]], i32 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP4]], align 128 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[T_VAR]], align 128 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 128 [[TMP10]], i8* align 128 [[TMP11]], i32 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x %struct.S.2]* [[TMP6]] to %struct.S.2* +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i32 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.2* [[ARRAY_BEGIN]], [[TMP13]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP12]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* [[AGG_TMP]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.2* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* nonnull align 4 dereferenceable(4) [[TMP3]], %struct.St* [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR1]], align 128 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 0 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 128 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct.S.0* [[ARRAYIDX7]] to i8* -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.S.0* [[VAR5]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 128 [[TMP10]], i8* align 128 [[TMP11]], i32 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i32 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.2* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.2* nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR]], align 128 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 0 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 128 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %struct.S.2* [[ARRAYIDX3]] to i8* +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast %struct.S.2* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 128 [[TMP15]], i8* align 128 [[TMP16]], i32 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN4]], i32 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP12]], [[OMP_ARRAYCPY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done9: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP17]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_2St -// CHECK1-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.2* nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK1-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* [[T]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 4 +// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.2*, align 4 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK1-NEXT: store %struct.S.2* [[S]], %struct.S.2** [[S_ADDR]], align 4 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S.2*, %struct.S.2** [[S_ADDR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.2* nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* [[T]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK1-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 4 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 128 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[T_VAR1]], align 128 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 128 +// CHECK1-NEXT: store i32 [[TMP3]], i32* [[T_VAR]], align 128 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK1-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 4 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 128 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK1-NEXT: ret void @@ -845,51 +881,57 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 4 // CHECK1-NEXT: [[A2:%.*]] = alloca i32*, align 4 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK1-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 4 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, i32* [[A]], align 4 // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], %struct.SST* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32* [[A3]], i32** [[A2]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A2]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.SST* [[THIS1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SST* [[THIS1]], %struct.SST** [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A2]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SST* [[THIS:%.*]], i32 [[A:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 4 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32*, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK1-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 4 -// CHECK1-NEXT: store i32* [[A_ADDR]], i32** [[TMP]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP1]], align 4 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SST*, %struct.SST** [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK1-NEXT: store i32* [[A]], i32** [[TMP]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP5]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK1-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 4 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 4 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 128 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -898,16 +940,16 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_2St -// CHECK1-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.2* nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK1-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 4 -// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 4 +// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.2*, align 4 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK1-NEXT: store %struct.S.2* [[S]], %struct.S.2** [[S_ADDR]], align 4 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S.2*, %struct.S.2** [[S_ADDR]], align 4 +// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[T]], i32 0, i32 0 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 @@ -917,11 +959,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK1-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 4 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 4 // CHECK1-NEXT: ret void // // @@ -961,9 +1003,7 @@ // CHECK3-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C7:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[E:%.*]] = alloca [4 x i32]*, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 @@ -990,65 +1030,76 @@ // CHECK3-NEXT: store i32* [[TMP1]], i32** [[C7]], align 4 // CHECK3-NEXT: [[E9:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 3 // CHECK3-NEXT: store [4 x i32]* [[E9]], [4 x i32]** [[E]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[C7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[C_CASTED]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[C_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load [4 x i32]*, [4 x i32]** [[E]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i32, i32, i32, [4 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP9]], [4 x i32]* [[TMP10]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A2]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[B4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[C7]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load [4 x i32]*, [4 x i32]** [[E]], align 4 +// CHECK3-NEXT: store [4 x i32]* [[TMP12]], [4 x i32]** [[TMP11]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], [4 x i32]* nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[E_ADDR:%.*]] = alloca [4 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca [4 x i32]*, align 4 -// CHECK3-NEXT: [[E3:%.*]] = alloca [4 x i32], align 4 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca [4 x i32]*, align 4 +// CHECK3-NEXT: [[E:%.*]] = alloca [4 x i32], align 4 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca [4 x i32]*, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 -// CHECK3-NEXT: store [4 x i32]* [[E]], [4 x i32]** [[E_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [4 x i32]*, [4 x i32]** [[E_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[A_ADDR]], i32** [[TMP]], align 4 -// CHECK3-NEXT: store i32* [[C_ADDR]], i32** [[_TMP1]], align 4 -// CHECK3-NEXT: store [4 x i32]* [[TMP1]], [4 x i32]** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load [4 x i32]*, [4 x i32]** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast [4 x i32]* [[E3]] to i8* -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast [4 x i32]* [[TMP2]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 16, i1 false) -// CHECK3-NEXT: store [4 x i32]* [[E3]], [4 x i32]** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[TMP]], align 4 -// CHECK3-NEXT: store i32* [[TMP7]], i32** [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[B_ADDR]], i32** [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP1]], align 4 -// CHECK3-NEXT: store i32* [[TMP10]], i32** [[TMP9]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[C]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load [4 x i32]*, [4 x i32]** [[TMP9]], align 4 +// CHECK3-NEXT: store i32* [[A]], i32** [[TMP]], align 4 +// CHECK3-NEXT: store i32* [[C]], i32** [[_TMP1]], align 4 +// CHECK3-NEXT: store [4 x i32]* [[TMP10]], [4 x i32]** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load [4 x i32]*, [4 x i32]** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast [4 x i32]* [[E]] to i8* +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast [4 x i32]* [[TMP11]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 16, i1 false) +// CHECK3-NEXT: store [4 x i32]* [[E]], [4 x i32]** [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[TMP]], align 4 +// CHECK3-NEXT: store i32* [[TMP16]], i32** [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[B]], i32** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[_TMP1]], align 4 +// CHECK3-NEXT: store i32* [[TMP19]], i32** [[TMP18]], align 4 // CHECK3-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(%class.anon.0* nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -1057,9 +1108,7 @@ // CHECK3-SAME: (%class.anon.0* nonnull align 4 dereferenceable(16) [[THIS:%.*]]) #[[ATTR2:[0-9]+]] align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.0*, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store %class.anon.0* [[THIS]], %class.anon.0** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load %class.anon.0*, %class.anon.0** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], %class.anon.0* [[THIS1]], i32 0, i32 0 @@ -1079,82 +1128,96 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[DIV]], i32* [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK3-NEXT: store i32 [[TMP21]], i32* [[C_CASTED]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[C_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i32, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.SS* [[TMP1]], i32 [[TMP14]], i32 [[TMP18]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP1]], %struct.SS** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[TMP20]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[A_ADDR]], i32** [[TMP]], align 4 -// CHECK3-NEXT: store i32* [[C_ADDR]], i32** [[_TMP1]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 -// CHECK3-NEXT: store i32 [[DEC]], i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[_TMP1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK3-NEXT: store i32 [[DIV]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[C]], align 4 +// CHECK3-NEXT: store i32* [[A]], i32** [[TMP]], align 4 +// CHECK3-NEXT: store i32* [[C]], i32** [[_TMP1]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP11]], -1 +// CHECK3-NEXT: store i32 [[DEC]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[DIV]], i32* [[TMP12]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 128 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca i32, align 128 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, i32* [[TMP0]], align 128 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[G1]], align 128 -// CHECK3-NEXT: store i32 1, i32* [[G1]], align 128 -// CHECK3-NEXT: store i32 2, i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store i32* [[G1]], i32** [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[SIVAR_ADDR]], i32** [[TMP3]], align 4 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load volatile i32, i32* [[TMP2]], align 128 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[G]], align 128 +// CHECK3-NEXT: store i32 1, i32* [[G]], align 128 +// CHECK3-NEXT: store i32 2, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[G]], i32** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[TMP7]], align 4 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.3* nonnull align 4 dereferenceable(8) [[REF_TMP]]) // CHECK3-NEXT: ret void // // @@ -1207,35 +1270,41 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 4 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>*, align 4 -// CHECK4-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>** [[BLOCK_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 -// CHECK4-NEXT: store i32 [[TMP0]], i32* [[SIVAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* @g, i32 [[TMP1]]) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store i32* @g, i32** [[TMP0]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[G1:%.*]] = alloca i32, align 128 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [104 x i8], i32 }>, align 128 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[G_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load volatile i32, i32* [[TMP0]], align 128 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[G1]], align 128 -// CHECK4-NEXT: store i32 1, i32* [[G1]], align 128 -// CHECK4-NEXT: store i32 2, i32* [[SIVAR_ADDR]], align 4 +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], i32* [[SIVAR]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load volatile i32, i32* [[TMP2]], align 128 +// CHECK4-NEXT: store i32 [[TMP5]], i32* [[G]], align 128 +// CHECK4-NEXT: store i32 1, i32* [[G]], align 128 +// CHECK4-NEXT: store i32 2, i32* [[SIVAR]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [104 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [104 x i8], i32 }>* [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 128 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [104 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [104 x i8], i32 }>* [[BLOCK]], i32 0, i32 1 @@ -1247,18 +1316,18 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [104 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [104 x i8], i32 }>* [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i32, i32, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 16 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [104 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [104 x i8], i32 }>* [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP2:%.*]] = load volatile i32, i32* [[G1]], align 128 -// CHECK4-NEXT: store volatile i32 [[TMP2]], i32* [[BLOCK_CAPTURED]], align 128 -// CHECK4-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [104 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [104 x i8], i32 }>* [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP3]], i32* [[BLOCK_CAPTURED2]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [104 x i8], i32 }>* [[BLOCK]] to void ()* -// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP4]] to %struct.__block_literal_generic* -// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP6:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP5]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP8]](i8* [[TMP6]]) +// CHECK4-NEXT: [[TMP6:%.*]] = load volatile i32, i32* [[G]], align 128 +// CHECK4-NEXT: store volatile i32 [[TMP6]], i32* [[BLOCK_CAPTURED]], align 128 +// CHECK4-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [104 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [104 x i8], i32 }>* [[BLOCK]], i32 0, i32 5 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP7]], i32* [[BLOCK_CAPTURED1]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [104 x i8], i32 }>* [[BLOCK]] to void ()* +// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP8]] to %struct.__block_literal_generic* +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP10:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK4-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP12]](i8* [[TMP10]]) // CHECK4-NEXT: ret void // // @@ -1286,9 +1355,7 @@ // CHECK4-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[C7:%.*]] = alloca i32*, align 4 // CHECK4-NEXT: [[E:%.*]] = alloca [4 x i32]*, align 4 -// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK4-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 4 // CHECK4-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 @@ -1315,55 +1382,66 @@ // CHECK4-NEXT: store i32* [[TMP1]], i32** [[C7]], align 4 // CHECK4-NEXT: [[E9:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 3 // CHECK4-NEXT: store [4 x i32]* [[E9]], [4 x i32]** [[E]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A2]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK4-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], i32* [[B_CASTED]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[C7]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], i32* [[C_CASTED]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[C_CASTED]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load [4 x i32]*, [4 x i32]** [[E]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i32, i32, i32, [4 x i32]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP9]], [4 x i32]* [[TMP10]]) +// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP2]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A2]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], i32* [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[B4]], align 4 +// CHECK4-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32*, i32** [[C7]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], i32* [[TMP8]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load [4 x i32]*, [4 x i32]** [[E]], align 4 +// CHECK4-NEXT: store [4 x i32]* [[TMP12]], [4 x i32]** [[TMP11]], align 4 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], [4 x i32]* nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[E_ADDR:%.*]] = alloca [4 x i32]*, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32*, align 4 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32*, align 4 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca [4 x i32]*, align 4 -// CHECK4-NEXT: [[E3:%.*]] = alloca [4 x i32], align 4 -// CHECK4-NEXT: [[_TMP4:%.*]] = alloca [4 x i32]*, align 4 +// CHECK4-NEXT: [[E:%.*]] = alloca [4 x i32], align 4 +// CHECK4-NEXT: [[_TMP3:%.*]] = alloca [4 x i32]*, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>, align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 -// CHECK4-NEXT: store [4 x i32]* [[E]], [4 x i32]** [[E_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load [4 x i32]*, [4 x i32]** [[E_ADDR]], align 4 -// CHECK4-NEXT: store i32* [[A_ADDR]], i32** [[TMP]], align 4 -// CHECK4-NEXT: store i32* [[C_ADDR]], i32** [[_TMP1]], align 4 -// CHECK4-NEXT: store [4 x i32]* [[TMP1]], [4 x i32]** [[_TMP2]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load [4 x i32]*, [4 x i32]** [[_TMP2]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = bitcast [4 x i32]* [[E3]] to i8* -// CHECK4-NEXT: [[TMP4:%.*]] = bitcast [4 x i32]* [[TMP2]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 16, i1 false) -// CHECK4-NEXT: store [4 x i32]* [[E3]], [4 x i32]** [[_TMP4]], align 4 +// CHECK4-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], i32* [[B]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], i32* [[C]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load [4 x i32]*, [4 x i32]** [[TMP9]], align 4 +// CHECK4-NEXT: store i32* [[A]], i32** [[TMP]], align 4 +// CHECK4-NEXT: store i32* [[C]], i32** [[_TMP1]], align 4 +// CHECK4-NEXT: store [4 x i32]* [[TMP10]], [4 x i32]** [[_TMP2]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load [4 x i32]*, [4 x i32]** [[_TMP2]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = bitcast [4 x i32]* [[E]] to i8* +// CHECK4-NEXT: [[TMP13:%.*]] = bitcast [4 x i32]* [[TMP11]] to i8* +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 16, i1 false) +// CHECK4-NEXT: store [4 x i32]* [[E]], [4 x i32]** [[_TMP3]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>* [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 4 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>* [[BLOCK]], i32 0, i32 1 @@ -1375,23 +1453,23 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>* [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i32, i32, i8*, i8* }* @__block_descriptor_tmp.4 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 4 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS_ADDR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>* [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[BLOCK_CAPTURED_THIS_ADDR]], align 4 +// CHECK4-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[BLOCK_CAPTURED_THIS_ADDR]], align 4 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP]], align 4 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[BLOCK_CAPTURED]], align 4 -// CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>* [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], i32* [[BLOCK_CAPTURED5]], align 4 -// CHECK4-NEXT: [[BLOCK_CAPTURED6:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>* [[BLOCK]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[_TMP1]], align 4 -// CHECK4-NEXT: store i32* [[TMP7]], i32** [[BLOCK_CAPTURED6]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>* [[BLOCK]] to void ()* -// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP8]] to %struct.__block_literal_generic* -// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP10:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK4-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP9]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP12]](i8* [[TMP10]]) +// CHECK4-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP]], align 4 +// CHECK4-NEXT: store i32* [[TMP14]], i32** [[BLOCK_CAPTURED]], align 4 +// CHECK4-NEXT: [[BLOCK_CAPTURED4:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>* [[BLOCK]], i32 0, i32 7 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[B]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], i32* [[BLOCK_CAPTURED4]], align 4 +// CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>* [[BLOCK]], i32 0, i32 8 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32*, i32** [[_TMP1]], align 4 +// CHECK4-NEXT: store i32* [[TMP16]], i32** [[BLOCK_CAPTURED5]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>* [[BLOCK]] to void ()* +// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP17]] to %struct.__block_literal_generic* +// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP19:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK4-NEXT: [[TMP20:%.*]] = load i8*, i8** [[TMP18]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP21]](i8* [[TMP19]]) // CHECK4-NEXT: ret void // // @@ -1400,9 +1478,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 4 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>*, align 4 -// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>** [[BLOCK_ADDR]], align 4 @@ -1422,55 +1498,65 @@ // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 1 // CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[TMP5]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR3]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR3]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], i32* [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR4:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>* [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[BLOCK_CAPTURE_ADDR4]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], i32* [[B_CASTED]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[B_CASTED]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[BLOCK_CAPTURE_ADDR4]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR5:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32, i32* }>* [[BLOCK]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR5]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK4-NEXT: store i32 [[TMP11]], i32* [[C_CASTED]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[C_CASTED]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i32, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.SS* [[THIS]], i32 [[TMP7]], i32 [[TMP9]], i32 [[TMP12]]) +// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR5]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK4-NEXT: store i32 [[TMP13]], i32* [[TMP11]], align 4 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32*, align 4 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32*, align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK4-NEXT: store i32* [[A_ADDR]], i32** [[TMP]], align 4 -// CHECK4-NEXT: store i32* [[C_ADDR]], i32** [[_TMP1]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK4-NEXT: store i32 [[INC]], i32* [[TMP1]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 -// CHECK4-NEXT: store i32 [[DEC]], i32* [[B_ADDR]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** [[_TMP1]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP4]], align 4 +// CHECK4-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], i32* [[B]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], i32* [[C]], align 4 +// CHECK4-NEXT: store i32* [[A]], i32** [[TMP]], align 4 +// CHECK4-NEXT: store i32* [[C]], i32** [[_TMP1]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK4-NEXT: store i32 [[INC]], i32* [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[B]], align 4 +// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP11]], -1 +// CHECK4-NEXT: store i32 [[DEC]], i32* [[B]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP1]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP13]], 1 +// CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP12]], align 4 // CHECK4-NEXT: ret void // // @@ -1484,10 +1570,9 @@ // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR_CASTED2:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK9-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK9-NEXT: call void @_ZN2SSC1ERi(%struct.SS* nonnull align 8 dereferenceable(32) [[SS]], i32* nonnull align 4 dereferenceable(4) @_ZZ4mainE5sivar) // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[TEST]]) @@ -1499,37 +1584,40 @@ // CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float 2.000000e+00) // CHECK9-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* nonnull align 4 dereferenceable(4) [[VAR]], float 3.000000e+00) -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], i64 [[TMP2]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[VAR]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: store i32 0, i32* [[A]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[T_VAR_CASTED2]] to i32* -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[T_VAR_CASTED2]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP6]]) +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK9-NEXT: [[CALL:%.*]] = call i32 @_Z5tmainIiET_v() // CHECK9-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4:[0-9]+]] // CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP10]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done4: +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done2: // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP8]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP11]] // // // CHECK9-LABEL: define {{[^@]+}}@_ZN2SSC1ERi @@ -1569,73 +1657,75 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [2 x i32]* nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 [[T_VAR:%.*]], [2 x %struct.S]* nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK9-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK9-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK9-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done4: -// CHECK9-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S* nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* [[AGG_TMP6]]) -// CHECK9-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP8:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* -// CHECK9-NEXT: [[TMP9:%.*]] = bitcast %struct.S* [[VAR5]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 4, i1 false) -// CHECK9-NEXT: store i32 2, i32* [[CONV1]], align 4 -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done1: +// CHECK9-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP16:%.*]] = bitcast %struct.S* [[ARRAYIDX3]] to i8* +// CHECK9-NEXT: [[TMP17:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP16]], i8* align 4 [[TMP17]], i64 4, i1 false) +// CHECK9-NEXT: store i32 2, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP10]], [[OMP_ARRAYCPY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP18]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done9: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done5: // CHECK9-NEXT: ret void // // @@ -1683,24 +1773,28 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[DOTT_VAR__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP1]], i64 4, i8* inttoptr (i64 1 to i8*)) +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: [[DOTT_VAR__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP4]], i64 4, i8* inttoptr (i64 1 to i8*)) // CHECK9-NEXT: [[DOTT_VAR__ADDR:%.*]] = bitcast i8* [[DOTT_VAR__VOID_ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTT_VAR__ADDR]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTT_VAR__ADDR]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTT_VAR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTT_VAR__ADDR]] to i8* -// CHECK9-NEXT: call void @__kmpc_free(i32 [[TMP1]], i8* [[TMP3]], i8* inttoptr (i64 1 to i8*)) +// CHECK9-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTT_VAR__ADDR]] to i8* +// CHECK9-NEXT: call void @__kmpc_free(i32 [[TMP4]], i8* [[TMP6]], i8* inttoptr (i64 1 to i8*)) // CHECK9-NEXT: ret void // // @@ -1708,39 +1802,51 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK9-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 128 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2]], align 128 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: call void @_ZN3SSTIiEC1Ev(%struct.SST* nonnull align 4 dereferenceable(4) [[SST]]) // CHECK9-NEXT: store i32 0, i32* [[T_VAR]], align 128 // CHECK9-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP0]], i8* align 128 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR]], i32 3) -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], i32* [[T_VAR]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[VAR]]) -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[T_VAR]]) +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.2* nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 1) +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.2* nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.2* nonnull align 4 dereferenceable(4) [[VAR]], i32 3) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[TMP5]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK9-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done1: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP2]] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done2: +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP7]] // // // CHECK9-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -1752,9 +1858,7 @@ // CHECK9-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[C7:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[E:%.*]] = alloca [4 x i32]*, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 @@ -1781,73 +1885,78 @@ // CHECK9-NEXT: store i32* [[TMP1]], i32** [[C7]], align 8 // CHECK9-NEXT: [[E9:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 3 // CHECK9-NEXT: store [4 x i32]* [[E9]], [4 x i32]** [[E]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK9-NEXT: [[CONV10:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV10]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[C7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: [[CONV11:%.*]] = bitcast i64* [[C_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[CONV11]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[C_CASTED]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load [4 x i32]*, [4 x i32]** [[E]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64, [4 x i32]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]], [4 x i32]* [[TMP10]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A2]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[B4]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[C7]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load [4 x i32]*, [4 x i32]** [[E]], align 8 +// CHECK9-NEXT: store [4 x i32]* [[TMP12]], [4 x i32]** [[TMP11]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], [4 x i32]* nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[E_ADDR:%.*]] = alloca [4 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[_TMP4:%.*]] = alloca [4 x i32]*, align 8 -// CHECK9-NEXT: [[E5:%.*]] = alloca [4 x i32], align 16 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca [4 x i32]*, align 8 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca [4 x i32]*, align 8 +// CHECK9-NEXT: [[E:%.*]] = alloca [4 x i32], align 16 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca [4 x i32]*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8 -// CHECK9-NEXT: store [4 x i32]* [[E]], [4 x i32]** [[E_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load [4 x i32]*, [4 x i32]** [[E_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8 -// CHECK9-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8 -// CHECK9-NEXT: store [4 x i32]* [[TMP1]], [4 x i32]** [[_TMP4]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [4 x i32]*, [4 x i32]** [[_TMP4]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = bitcast [4 x i32]* [[E5]] to i8* -// CHECK9-NEXT: [[TMP4:%.*]] = bitcast [4 x i32]* [[TMP2]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 4 [[TMP4]], i64 16, i1 false) -// CHECK9-NEXT: store [4 x i32]* [[E5]], [4 x i32]** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 // CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK9-NEXT: store i32 [[INC]], i32* [[TMP5]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1 -// CHECK9-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP9]], 1 -// CHECK9-NEXT: store i32 [[DIV]], i32* [[TMP8]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load [4 x i32]*, [4 x i32]** [[_TMP6]], align 8 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[TMP10]], i64 0, i64 2 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[B]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], i32* [[C]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load [4 x i32]*, [4 x i32]** [[TMP9]], align 8 +// CHECK9-NEXT: store i32* [[A]], i32** [[TMP]], align 8 +// CHECK9-NEXT: store i32* [[C]], i32** [[_TMP1]], align 8 +// CHECK9-NEXT: store [4 x i32]* [[TMP10]], [4 x i32]** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load [4 x i32]*, [4 x i32]** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = bitcast [4 x i32]* [[E]] to i8* +// CHECK9-NEXT: [[TMP13:%.*]] = bitcast [4 x i32]* [[TMP11]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP12]], i8* align 4 [[TMP13]], i64 16, i1 false) +// CHECK9-NEXT: store [4 x i32]* [[E]], [4 x i32]** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: store i32 [[INC]], i32* [[TMP14]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[B]], align 4 +// CHECK9-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP16]], -1 +// CHECK9-NEXT: store i32 [[DEC]], i32* [[B]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP18]], 1 +// CHECK9-NEXT: store i32 [[DIV]], i32* [[TMP17]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load [4 x i32]*, [4 x i32]** [[_TMP3]], align 8 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[TMP19]], i64 0, i64 2 // CHECK9-NEXT: store i32 1111, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: ret void // @@ -1934,12 +2043,12 @@ // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK9-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK9-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK9-NEXT: ret void // // @@ -1954,134 +2063,135 @@ // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK9-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS1]], i32 [[TMP0]]) +// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS1]], i32 [[TMP0]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [2 x i32]* nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK9-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 128 // CHECK9-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK9-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 128 +// CHECK9-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 128 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[T_VAR1]], align 128 -// CHECK9-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK9-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP5]], i8* align 128 [[TMP6]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP4]], align 128 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[T_VAR]], align 128 +// CHECK9-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP10]], i8* align 128 [[TMP11]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = bitcast [2 x %struct.S.2]* [[TMP6]] to %struct.S.2* +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.2* [[ARRAY_BEGIN]], [[TMP13]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP12]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP]]) -// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* [[AGG_TMP]]) +// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.2* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done4: -// CHECK9-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* nonnull align 4 dereferenceable(4) [[TMP3]], %struct.St* [[AGG_TMP6]]) -// CHECK9-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR1]], align 128 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 128 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP10:%.*]] = bitcast %struct.S.0* [[ARRAYIDX7]] to i8* -// CHECK9-NEXT: [[TMP11:%.*]] = bitcast %struct.S.0* [[VAR5]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP10]], i8* align 128 [[TMP11]], i64 4, i1 false) -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.2* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done1: +// CHECK9-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.2* nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR]], align 128 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 128 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP15:%.*]] = bitcast %struct.S.2* [[ARRAYIDX3]] to i8* +// CHECK9-NEXT: [[TMP16:%.*]] = bitcast %struct.S.2* [[VAR]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP15]], i8* align 128 [[TMP16]], i64 4, i1 false) +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP12]], [[OMP_ARRAYCPY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done9: +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP17]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done5: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_2St -// CHECK9-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.2* nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* [[T]]) +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK9-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK9-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.2* [[S]], %struct.S.2** [[S_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S.2*, %struct.S.2** [[S_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.2* nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* [[T]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK9-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK9-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 128 -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[T_VAR1]], align 128 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 128 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[T_VAR]], align 128 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK9-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK9-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 128 // CHECK9-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK9-NEXT: ret void @@ -2092,53 +2202,57 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 // CHECK9-NEXT: [[A2:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK9-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, i32* [[A]], align 4 // CHECK9-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], %struct.SST* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32* [[A3]], i32** [[A2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A2]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.SST* [[THIS1]], i64 [[TMP2]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.SST* [[THIS1]], %struct.SST** [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SST* [[THIS:%.*]], i64 [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK9-NEXT: store i32 [[INC]], i32* [[TMP1]], align 4 +// CHECK9-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.SST*, %struct.SST** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK9-NEXT: store i32* [[A]], i32** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK9-NEXT: store i32 [[INC]], i32* [[TMP5]], align 4 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK9-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 128 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -2147,16 +2261,16 @@ // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_2St -// CHECK9-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.2* nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 -// CHECK9-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK9-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK9-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.2* [[S]], %struct.S.2** [[S_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S.2*, %struct.S.2** [[S_ADDR]], align 8 +// CHECK9-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[TMP0]], i32 0, i32 0 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[T]], i32 0, i32 0 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 @@ -2166,11 +2280,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK9-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.2* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK9-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -2210,9 +2324,7 @@ // CHECK11-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[C7:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[E:%.*]] = alloca [4 x i32]*, align 8 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK11-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK11-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8 // CHECK11-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 @@ -2239,71 +2351,76 @@ // CHECK11-NEXT: store i32* [[TMP1]], i32** [[C7]], align 8 // CHECK11-NEXT: [[E9:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 3 // CHECK11-NEXT: store [4 x i32]* [[E9]], [4 x i32]** [[E]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A2]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK11-NEXT: [[CONV10:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[CONV10]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[C7]], align 8 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: [[CONV11:%.*]] = bitcast i64* [[C_CASTED]] to i32* -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[CONV11]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, i64* [[C_CASTED]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load [4 x i32]*, [4 x i32]** [[E]], align 8 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64, [4 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]], [4 x i32]* [[TMP10]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP2]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A2]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[B4]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[C7]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[TMP8]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load [4 x i32]*, [4 x i32]** [[E]], align 8 +// CHECK11-NEXT: store [4 x i32]* [[TMP12]], [4 x i32]** [[TMP11]], align 8 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], [4 x i32]* nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[E_ADDR:%.*]] = alloca [4 x i32]*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[_TMP4:%.*]] = alloca [4 x i32]*, align 8 -// CHECK11-NEXT: [[E5:%.*]] = alloca [4 x i32], align 16 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca [4 x i32]*, align 8 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca [4 x i32]*, align 8 +// CHECK11-NEXT: [[E:%.*]] = alloca [4 x i32], align 16 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca [4 x i32]*, align 8 // CHECK11-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK11-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK11-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK11-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8 -// CHECK11-NEXT: store [4 x i32]* [[E]], [4 x i32]** [[E_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK11-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32* -// CHECK11-NEXT: [[TMP1:%.*]] = load [4 x i32]*, [4 x i32]** [[E_ADDR]], align 8 -// CHECK11-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8 -// CHECK11-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8 -// CHECK11-NEXT: store [4 x i32]* [[TMP1]], [4 x i32]** [[_TMP4]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load [4 x i32]*, [4 x i32]** [[_TMP4]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = bitcast [4 x i32]* [[E5]] to i8* -// CHECK11-NEXT: [[TMP4:%.*]] = bitcast [4 x i32]* [[TMP2]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 4 [[TMP4]], i64 16, i1 false) -// CHECK11-NEXT: store [4 x i32]* [[E5]], [4 x i32]** [[_TMP6]], align 8 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK11-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP5]], align 8 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK11-NEXT: store i32* [[TMP7]], i32** [[TMP6]], align 8 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK11-NEXT: store i32* [[CONV1]], i32** [[TMP8]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK11-NEXT: store i32* [[TMP10]], i32** [[TMP9]], align 8 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[B]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[C]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load [4 x i32]*, [4 x i32]** [[TMP9]], align 8 +// CHECK11-NEXT: store i32* [[A]], i32** [[TMP]], align 8 +// CHECK11-NEXT: store i32* [[C]], i32** [[_TMP1]], align 8 +// CHECK11-NEXT: store [4 x i32]* [[TMP10]], [4 x i32]** [[_TMP2]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load [4 x i32]*, [4 x i32]** [[_TMP2]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = bitcast [4 x i32]* [[E]] to i8* +// CHECK11-NEXT: [[TMP13:%.*]] = bitcast [4 x i32]* [[TMP11]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP12]], i8* align 4 [[TMP13]], i64 16, i1 false) +// CHECK11-NEXT: store [4 x i32]* [[E]], [4 x i32]** [[_TMP3]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK11-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK11-NEXT: store i32* [[TMP16]], i32** [[TMP15]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[B]], i32** [[TMP17]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK11-NEXT: store i32* [[TMP19]], i32** [[TMP18]], align 8 // CHECK11-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(%class.anon.0* nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK11-NEXT: ret void // @@ -2312,9 +2429,7 @@ // CHECK11-SAME: (%class.anon.0* nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR2:[0-9]+]] align 2 { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.0*, align 8 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK11-NEXT: store %class.anon.0* [[THIS]], %class.anon.0** [[THIS_ADDR]], align 8 // CHECK11-NEXT: [[THIS1:%.*]] = load %class.anon.0*, %class.anon.0** [[THIS_ADDR]], align 8 // CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], %class.anon.0* [[THIS1]], i32 0, i32 0 @@ -2334,89 +2449,96 @@ // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[DIV]], i32* [[TMP9]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[CONV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[TMP15]], align 8 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK11-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK11-NEXT: store i32 [[TMP17]], i32* [[CONV2]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 3 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP19]], align 8 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK11-NEXT: [[CONV3:%.*]] = bitcast i64* [[C_CASTED]] to i32* -// CHECK11-NEXT: store i32 [[TMP21]], i32* [[CONV3]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i64, i64* [[C_CASTED]], align 8 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.SS* [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]]) +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store %struct.SS* [[TMP1]], %struct.SS** [[TMP11]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP21]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[TMP20]], align 8 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK11-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK11-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK11-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK11-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32* -// CHECK11-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8 -// CHECK11-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK11-NEXT: store i32 [[INC]], i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK11-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 -// CHECK11-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK11-NEXT: store i32 [[DIV]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[B]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[C]], align 4 +// CHECK11-NEXT: store i32* [[A]], i32** [[TMP]], align 8 +// CHECK11-NEXT: store i32* [[C]], i32** [[_TMP1]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: store i32 [[INC]], i32* [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[B]], align 4 +// CHECK11-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP11]], -1 +// CHECK11-NEXT: store i32 [[DEC]], i32* [[B]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP13]], 1 +// CHECK11-NEXT: store i32 [[DIV]], i32* [[TMP12]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[G1:%.*]] = alloca i32, align 128 -// CHECK11-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK11-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[G:%.*]] = alloca i32, align 128 +// CHECK11-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 -// CHECK11-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK11-NEXT: [[TMP1:%.*]] = load volatile i32, i32* [[TMP0]], align 128 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[G1]], align 128 -// CHECK11-NEXT: store i32 1, i32* [[G1]], align 128 -// CHECK11-NEXT: store i32 2, i32* [[CONV]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 -// CHECK11-NEXT: store i32* [[G1]], i32** [[TMP2]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 -// CHECK11-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 -// CHECK11-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* nonnull align 8 dereferenceable(16) [[REF_TMP]]) +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[SIVAR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load volatile i32, i32* [[TMP2]], align 128 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[G]], align 128 +// CHECK11-NEXT: store i32 1, i32* [[G]], align 128 +// CHECK11-NEXT: store i32 2, i32* [[SIVAR]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[G]], i32** [[TMP6]], align 8 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[SIVAR]], i32** [[TMP7]], align 8 +// CHECK11-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.4* nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK11-NEXT: ret void // // @@ -2469,37 +2591,41 @@ // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK12-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>*, align 8 -// CHECK12-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK12-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK12-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK12-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* // CHECK12-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>** [[BLOCK_ADDR]], align 8 -// CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 -// CHECK12-NEXT: [[CONV:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK12-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* @g, i64 [[TMP1]]) +// CHECK12-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK12-NEXT: store i32* @g, i32** [[TMP0]], align 8 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// CHECK12-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK12-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 -// CHECK12-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[G1:%.*]] = alloca i32, align 128 +// CHECK12-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK12-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK12-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>, align 128 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK12-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 -// CHECK12-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK12-NEXT: [[TMP0:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK12-NEXT: [[CONV:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK12-NEXT: [[TMP1:%.*]] = load volatile i32, i32* [[TMP0]], align 128 -// CHECK12-NEXT: store i32 [[TMP1]], i32* [[G1]], align 128 -// CHECK12-NEXT: store i32 1, i32* [[G1]], align 128 -// CHECK12-NEXT: store i32 2, i32* [[CONV]], align 4 +// CHECK12-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK12-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK12-NEXT: store i32 [[TMP4]], i32* [[SIVAR]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load volatile i32, i32* [[TMP2]], align 128 +// CHECK12-NEXT: store i32 [[TMP5]], i32* [[G]], align 128 +// CHECK12-NEXT: store i32 1, i32* [[G]], align 128 +// CHECK12-NEXT: store i32 2, i32* [[SIVAR]], align 4 // CHECK12-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]], i32 0, i32 0 // CHECK12-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 128 // CHECK12-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]], i32 0, i32 1 @@ -2511,18 +2637,18 @@ // CHECK12-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]], i32 0, i32 4 // CHECK12-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 // CHECK12-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]], i32 0, i32 7 -// CHECK12-NEXT: [[TMP2:%.*]] = load volatile i32, i32* [[G1]], align 128 -// CHECK12-NEXT: store volatile i32 [[TMP2]], i32* [[BLOCK_CAPTURED]], align 128 -// CHECK12-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]], i32 0, i32 5 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK12-NEXT: store i32 [[TMP3]], i32* [[BLOCK_CAPTURED2]], align 32 -// CHECK12-NEXT: [[TMP4:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]] to void ()* -// CHECK12-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP4]] to %struct.__block_literal_generic* -// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK12-NEXT: [[TMP6:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK12-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP5]], align 8 -// CHECK12-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to void (i8*)* -// CHECK12-NEXT: call void [[TMP8]](i8* [[TMP6]]) +// CHECK12-NEXT: [[TMP6:%.*]] = load volatile i32, i32* [[G]], align 128 +// CHECK12-NEXT: store volatile i32 [[TMP6]], i32* [[BLOCK_CAPTURED]], align 128 +// CHECK12-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]], i32 0, i32 5 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK12-NEXT: store i32 [[TMP7]], i32* [[BLOCK_CAPTURED1]], align 32 +// CHECK12-NEXT: [[TMP8:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]] to void ()* +// CHECK12-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP8]] to %struct.__block_literal_generic* +// CHECK12-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP10:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK12-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP9]], align 8 +// CHECK12-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to void (i8*)* +// CHECK12-NEXT: call void [[TMP12]](i8* [[TMP10]]) // CHECK12-NEXT: ret void // // @@ -2550,9 +2676,7 @@ // CHECK12-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[C7:%.*]] = alloca i32*, align 8 // CHECK12-NEXT: [[E:%.*]] = alloca [4 x i32]*, align 8 -// CHECK12-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK12-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK12-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK12-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8 // CHECK12-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 @@ -2579,61 +2703,66 @@ // CHECK12-NEXT: store i32* [[TMP1]], i32** [[C7]], align 8 // CHECK12-NEXT: [[E9:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 3 // CHECK12-NEXT: store [4 x i32]* [[E9]], [4 x i32]** [[E]], align 8 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A2]], align 8 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK12-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK12-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK12-NEXT: [[CONV10:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK12-NEXT: store i32 [[TMP5]], i32* [[CONV10]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32*, i32** [[C7]], align 8 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK12-NEXT: [[CONV11:%.*]] = bitcast i64* [[C_CASTED]] to i32* -// CHECK12-NEXT: store i32 [[TMP8]], i32* [[CONV11]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i64, i64* [[C_CASTED]], align 8 -// CHECK12-NEXT: [[TMP10:%.*]] = load [4 x i32]*, [4 x i32]** [[E]], align 8 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64, [4 x i32]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]], [4 x i32]* [[TMP10]]) +// CHECK12-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK12-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP2]], align 8 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A2]], align 8 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK12-NEXT: store i32 [[TMP5]], i32* [[TMP3]], align 8 +// CHECK12-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[B4]], align 4 +// CHECK12-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32*, i32** [[C7]], align 8 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK12-NEXT: store i32 [[TMP10]], i32* [[TMP8]], align 8 +// CHECK12-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load [4 x i32]*, [4 x i32]** [[E]], align 8 +// CHECK12-NEXT: store [4 x i32]* [[TMP12]], [4 x i32]** [[TMP11]], align 8 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], [4 x i32]* nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR2]] { +// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK12-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[E_ADDR:%.*]] = alloca [4 x i32]*, align 8 +// CHECK12-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK12-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[TMP:%.*]] = alloca i32*, align 8 -// CHECK12-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 -// CHECK12-NEXT: [[_TMP4:%.*]] = alloca [4 x i32]*, align 8 -// CHECK12-NEXT: [[E5:%.*]] = alloca [4 x i32], align 16 -// CHECK12-NEXT: [[_TMP6:%.*]] = alloca [4 x i32]*, align 8 +// CHECK12-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 +// CHECK12-NEXT: [[_TMP2:%.*]] = alloca [4 x i32]*, align 8 +// CHECK12-NEXT: [[E:%.*]] = alloca [4 x i32], align 16 +// CHECK12-NEXT: [[_TMP3:%.*]] = alloca [4 x i32]*, align 8 // CHECK12-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, align 8 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK12-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK12-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK12-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK12-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8 -// CHECK12-NEXT: store [4 x i32]* [[E]], [4 x i32]** [[E_ADDR]], align 8 -// CHECK12-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK12-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK12-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK12-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32* -// CHECK12-NEXT: [[TMP1:%.*]] = load [4 x i32]*, [4 x i32]** [[E_ADDR]], align 8 -// CHECK12-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8 -// CHECK12-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8 -// CHECK12-NEXT: store [4 x i32]* [[TMP1]], [4 x i32]** [[_TMP4]], align 8 -// CHECK12-NEXT: [[TMP2:%.*]] = load [4 x i32]*, [4 x i32]** [[_TMP4]], align 8 -// CHECK12-NEXT: [[TMP3:%.*]] = bitcast [4 x i32]* [[E5]] to i8* -// CHECK12-NEXT: [[TMP4:%.*]] = bitcast [4 x i32]* [[TMP2]] to i8* -// CHECK12-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 4 [[TMP4]], i64 16, i1 false) -// CHECK12-NEXT: store [4 x i32]* [[E5]], [4 x i32]** [[_TMP6]], align 8 +// CHECK12-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK12-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK12-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], i32* [[B]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK12-NEXT: store i32 [[TMP8]], i32* [[C]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load [4 x i32]*, [4 x i32]** [[TMP9]], align 8 +// CHECK12-NEXT: store i32* [[A]], i32** [[TMP]], align 8 +// CHECK12-NEXT: store i32* [[C]], i32** [[_TMP1]], align 8 +// CHECK12-NEXT: store [4 x i32]* [[TMP10]], [4 x i32]** [[_TMP2]], align 8 +// CHECK12-NEXT: [[TMP11:%.*]] = load [4 x i32]*, [4 x i32]** [[_TMP2]], align 8 +// CHECK12-NEXT: [[TMP12:%.*]] = bitcast [4 x i32]* [[E]] to i8* +// CHECK12-NEXT: [[TMP13:%.*]] = bitcast [4 x i32]* [[TMP11]] to i8* +// CHECK12-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP12]], i8* align 4 [[TMP13]], i64 16, i1 false) +// CHECK12-NEXT: store [4 x i32]* [[E]], [4 x i32]** [[_TMP3]], align 8 // CHECK12-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 0 // CHECK12-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 8 // CHECK12-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 1 @@ -2645,23 +2774,23 @@ // CHECK12-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 4 // CHECK12-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.4 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 // CHECK12-NEXT: [[BLOCK_CAPTURED_THIS_ADDR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 5 -// CHECK12-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[BLOCK_CAPTURED_THIS_ADDR]], align 8 +// CHECK12-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[BLOCK_CAPTURED_THIS_ADDR]], align 8 // CHECK12-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 6 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK12-NEXT: store i32* [[TMP5]], i32** [[BLOCK_CAPTURED]], align 8 -// CHECK12-NEXT: [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 8 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK12-NEXT: store i32 [[TMP6]], i32* [[BLOCK_CAPTURED7]], align 8 -// CHECK12-NEXT: [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 7 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK12-NEXT: store i32* [[TMP7]], i32** [[BLOCK_CAPTURED8]], align 8 -// CHECK12-NEXT: [[TMP8:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]] to void ()* -// CHECK12-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP8]] to %struct.__block_literal_generic* -// CHECK12-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK12-NEXT: [[TMP10:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK12-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK12-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to void (i8*)* -// CHECK12-NEXT: call void [[TMP12]](i8* [[TMP10]]) +// CHECK12-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK12-NEXT: store i32* [[TMP14]], i32** [[BLOCK_CAPTURED]], align 8 +// CHECK12-NEXT: [[BLOCK_CAPTURED4:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 8 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[B]], align 4 +// CHECK12-NEXT: store i32 [[TMP15]], i32* [[BLOCK_CAPTURED4]], align 8 +// CHECK12-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 7 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK12-NEXT: store i32* [[TMP16]], i32** [[BLOCK_CAPTURED5]], align 8 +// CHECK12-NEXT: [[TMP17:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]] to void ()* +// CHECK12-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP17]] to %struct.__block_literal_generic* +// CHECK12-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP19:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK12-NEXT: [[TMP20:%.*]] = load i8*, i8** [[TMP18]], align 8 +// CHECK12-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to void (i8*)* +// CHECK12-NEXT: call void [[TMP21]](i8* [[TMP19]]) // CHECK12-NEXT: ret void // // @@ -2670,9 +2799,7 @@ // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK12-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>*, align 8 -// CHECK12-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK12-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK12-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK12-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* // CHECK12-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>** [[BLOCK_ADDR]], align 8 @@ -2692,61 +2819,65 @@ // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 1 // CHECK12-NEXT: store i32 [[DIV]], i32* [[TMP3]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK12-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[TMP5]], align 8 +// CHECK12-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK12-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 6 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR3]], align 8 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK12-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK12-NEXT: store i32 [[TMP6]], i32* [[CONV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR3]], align 8 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK12-NEXT: store i32 [[TMP8]], i32* [[TMP6]], align 8 +// CHECK12-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK12-NEXT: [[BLOCK_CAPTURE_ADDR4:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 8 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[BLOCK_CAPTURE_ADDR4]], align 8 -// CHECK12-NEXT: [[CONV5:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK12-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK12-NEXT: [[BLOCK_CAPTURE_ADDR6:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 7 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR6]], align 8 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK12-NEXT: [[CONV7:%.*]] = bitcast i64* [[C_CASTED]] to i32* -// CHECK12-NEXT: store i32 [[TMP11]], i32* [[CONV7]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i64, i64* [[C_CASTED]], align 8 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.SS* [[THIS]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP12]]) +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[BLOCK_CAPTURE_ADDR4]], align 8 +// CHECK12-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK12-NEXT: [[BLOCK_CAPTURE_ADDR5:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 7 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR5]], align 8 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK12-NEXT: store i32 [[TMP13]], i32* [[TMP11]], align 8 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.SS* [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR2]] { +// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK12-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK12-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK12-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[TMP:%.*]] = alloca i32*, align 8 -// CHECK12-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 +// CHECK12-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK12-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK12-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK12-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK12-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8 -// CHECK12-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK12-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK12-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK12-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32* -// CHECK12-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8 -// CHECK12-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK12-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK12-NEXT: store i32 [[INC]], i32* [[TMP1]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK12-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 -// CHECK12-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK12-NEXT: store i32 [[DIV]], i32* [[TMP4]], align 4 +// CHECK12-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK12-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK12-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], i32* [[B]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK12-NEXT: store i32 [[TMP8]], i32* [[C]], align 4 +// CHECK12-NEXT: store i32* [[A]], i32** [[TMP]], align 8 +// CHECK12-NEXT: store i32* [[C]], i32** [[_TMP1]], align 8 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK12-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK12-NEXT: store i32 [[INC]], i32* [[TMP9]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[B]], align 4 +// CHECK12-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP11]], -1 +// CHECK12-NEXT: store i32 [[DEC]], i32* [[B]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP13]], 1 +// CHECK12-NEXT: store i32 [[DIV]], i32* [[TMP12]], align 4 // CHECK12-NEXT: ret void // // @@ -2760,6 +2891,7 @@ // CHECK17-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK17-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK17-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 // CHECK17-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 @@ -2776,65 +2908,76 @@ // CHECK17-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP7]], align 128 // CHECK17-NEXT: store i64 [[TMP3]], i64* [[__VLA_EXPR0]], align 8 // CHECK17-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR1]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load x86_fp80*, x86_fp80** [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load float*, float** [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 8, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.St*, i32*, i64, x86_fp80*, float*, i64, i64, double*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.St* [[TMP8]], i32* [[N_ADDR]], i64 [[TMP1]], x86_fp80* [[TMP9]], float* [[TMP10]], i64 [[TMP3]], i64 [[TMP5]], double* [[VLA]]) -// CHECK17-NEXT: [[TMP11:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK17-NEXT: call void @llvm.stackrestore(i8* [[TMP11]]) +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP9:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 +// CHECK17-NEXT: store %struct.St* [[TMP9]], %struct.St** [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32* [[N_ADDR]], i32** [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP1]], i64* [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP13:%.*]] = load x86_fp80*, x86_fp80** [[VLA1_ADDR]], align 8 +// CHECK17-NEXT: store x86_fp80* [[TMP13]], x86_fp80** [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK17-NEXT: store float* [[TMP15]], float** [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store i64 [[TMP3]], i64* [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: store i64 [[TMP5]], i64* [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK17-NEXT: store double* [[VLA]], double** [[TMP18]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK17-NEXT: call void @llvm.stackrestore(i8* [[TMP19]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.St* [[S:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], i64 [[VLA:%.*]], x86_fp80* [[VLA1:%.*]], float* [[A:%.*]], i64 [[VLA2:%.*]], i64 [[VLA4:%.*]], double* nonnull align 8 dereferenceable(8) [[VLA26:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK17-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA1_ADDR:%.*]] = alloca x86_fp80*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 -// CHECK17-NEXT: [[VLA_ADDR3:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR5:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA2_ADDR:%.*]] = alloca double*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK17-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK17-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store x86_fp80* [[VLA1]], x86_fp80** [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA2]], i64* [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: store i64 [[VLA4]], i64* [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: store double* [[VLA26]], double** [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load double*, double** [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = call i8* @llvm.stacksave() -// CHECK17-NEXT: store i8* [[TMP5]], i8** [[SAVED_STACK]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[VLA7:%.*]] = alloca double, i64 [[TMP6]], align 128 -// CHECK17-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 -// CHECK17-NEXT: store i64 [[TMP3]], i64* [[__VLA_EXPR1]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 8 -// CHECK17-NEXT: [[TMP9:%.*]] = bitcast double* [[VLA7]] to i8* -// CHECK17-NEXT: [[TMP10:%.*]] = bitcast double* [[TMP4]] to i8* -// CHECK17-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP9]], i8* align 128 [[TMP10]], i64 [[TMP8]], i1 false) -// CHECK17-NEXT: [[TMP11:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[TMP11]], i64 0 -// CHECK17-NEXT: [[TMP12:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load x86_fp80*, x86_fp80** [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: call void @_ZN2St7St_funcEPS_iPe(%struct.St* nonnull align 4 dereferenceable(8) [[ARRAYIDX]], %struct.St* [[TMP12]], i32 [[TMP13]], x86_fp80* [[TMP14]]) -// CHECK17-NEXT: [[TMP15:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK17-NEXT: call void @llvm.stackrestore(i8* [[TMP15]]) +// CHECK17-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP13:%.*]] = load double*, double** [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = call i8* @llvm.stacksave() +// CHECK17-NEXT: store i8* [[TMP14]], i8** [[SAVED_STACK]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP9]], [[TMP11]] +// CHECK17-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP15]], align 128 +// CHECK17-NEXT: store i64 [[TMP9]], i64* [[__VLA_EXPR0]], align 8 +// CHECK17-NEXT: store i64 [[TMP11]], i64* [[__VLA_EXPR1]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP9]], [[TMP11]] +// CHECK17-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 8 +// CHECK17-NEXT: [[TMP18:%.*]] = bitcast double* [[VLA]] to i8* +// CHECK17-NEXT: [[TMP19:%.*]] = bitcast double* [[TMP13]] to i8* +// CHECK17-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP18]], i8* align 128 [[TMP19]], i64 [[TMP17]], i1 false) +// CHECK17-NEXT: [[TMP20:%.*]] = load %struct.St*, %struct.St** [[TMP1]], align 8 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[TMP20]], i64 0 +// CHECK17-NEXT: [[TMP21:%.*]] = load %struct.St*, %struct.St** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load x86_fp80*, x86_fp80** [[TMP6]], align 8 +// CHECK17-NEXT: call void @_ZN2St7St_funcEPS_iPe(%struct.St* nonnull align 4 dereferenceable(8) [[ARRAYIDX]], %struct.St* [[TMP21]], i32 [[TMP22]], x86_fp80* [[TMP23]]) +// CHECK17-NEXT: [[TMP24:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK17-NEXT: call void @llvm.stackrestore(i8* [[TMP24]]) // CHECK17-NEXT: ret void // // @@ -2848,6 +2991,7 @@ // CHECK17-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK17-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store %struct.St* [[THIS]], %struct.St** [[THIS_ADDR]], align 8 // CHECK17-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 // CHECK17-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 @@ -2869,77 +3013,88 @@ // CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[B]], align 4 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[THIS1]], i32 0, i32 0 // CHECK17-NEXT: store i32 [[TMP8]], i32* [[A]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load x86_fp80*, x86_fp80** [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 8, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, x86_fp80*, %struct.St*, i64, i64, double*, i32*, %struct.St*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP1]], x86_fp80* [[TMP9]], %struct.St* [[THIS1]], i64 [[TMP3]], i64 [[TMP5]], double* [[VLA]], i32* [[N_ADDR]], %struct.St* [[TMP10]]) -// CHECK17-NEXT: [[TMP11:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK17-NEXT: call void @llvm.stackrestore(i8* [[TMP11]]) +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i64 [[TMP1]], i64* [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP11:%.*]] = load x86_fp80*, x86_fp80** [[VLA1_ADDR]], align 8 +// CHECK17-NEXT: store x86_fp80* [[TMP11]], x86_fp80** [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store %struct.St* [[THIS1]], %struct.St** [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP3]], i64* [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i64 [[TMP5]], i64* [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store double* [[VLA]], double** [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: store i32* [[N_ADDR]], i32** [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP18:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 +// CHECK17-NEXT: store %struct.St* [[TMP18]], %struct.St** [[TMP17]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK17-NEXT: call void @llvm.stackrestore(i8* [[TMP19]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[VLA:%.*]], x86_fp80* [[VLA1:%.*]], %struct.St* [[THIS:%.*]], i64 [[VLA2:%.*]], i64 [[VLA4:%.*]], double* nonnull align 8 dereferenceable(8) [[VLA26:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.St* [[S:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA1_ADDR:%.*]] = alloca x86_fp80*, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.St*, align 8 -// CHECK17-NEXT: [[VLA_ADDR3:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR5:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA2_ADDR:%.*]] = alloca double*, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store x86_fp80* [[VLA1]], x86_fp80** [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: store %struct.St* [[THIS]], %struct.St** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA2]], i64* [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: store i64 [[VLA4]], i64* [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: store double* [[VLA26]], double** [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load %struct.St*, %struct.St** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load double*, double** [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 8 -// CHECK17-NEXT: [[TMP8:%.*]] = add nuw i64 [[TMP7]], 127 -// CHECK17-NEXT: [[TMP9:%.*]] = udiv i64 [[TMP8]], 128 -// CHECK17-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 128 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK17-NEXT: [[DOTVLA2__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP12]], i64 [[TMP10]], i8* inttoptr (i64 8 to i8*)) +// CHECK17-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP5:%.*]] = load %struct.St*, %struct.St** [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP11:%.*]] = load double*, double** [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP7]], [[TMP9]] +// CHECK17-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 8 +// CHECK17-NEXT: [[TMP17:%.*]] = add nuw i64 [[TMP16]], 127 +// CHECK17-NEXT: [[TMP18:%.*]] = udiv i64 [[TMP17]], 128 +// CHECK17-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 128 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK17-NEXT: [[DOTVLA2__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP21]], i64 [[TMP19]], i8* inttoptr (i64 8 to i8*)) // CHECK17-NEXT: [[DOTVLA2__ADDR:%.*]] = bitcast i8* [[DOTVLA2__VOID_ADDR]] to double* -// CHECK17-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 8 -// CHECK17-NEXT: [[TMP15:%.*]] = bitcast double* [[DOTVLA2__ADDR]] to i8* -// CHECK17-NEXT: [[TMP16:%.*]] = bitcast double* [[TMP4]] to i8* -// CHECK17-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP15]], i8* align 128 [[TMP16]], i64 [[TMP14]], i1 false) -// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[TMP1]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[B]], align 4 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[TMP1]], i32 0, i32 0 -// CHECK17-NEXT: store i32 [[TMP17]], i32* [[A]], align 4 -// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP17]] to double -// CHECK17-NEXT: [[TMP18:%.*]] = mul nsw i64 1, [[TMP3]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[DOTVLA2__ADDR]], i64 [[TMP18]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP7]], [[TMP9]] +// CHECK17-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 8 +// CHECK17-NEXT: [[TMP24:%.*]] = bitcast double* [[DOTVLA2__ADDR]] to i8* +// CHECK17-NEXT: [[TMP25:%.*]] = bitcast double* [[TMP11]] to i8* +// CHECK17-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP24]], i8* align 128 [[TMP25]], i64 [[TMP23]], i1 false) +// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[TMP5]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[B]], align 4 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[TMP5]], i32 0, i32 0 +// CHECK17-NEXT: store i32 [[TMP26]], i32* [[A]], align 4 +// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP26]] to double +// CHECK17-NEXT: [[TMP27:%.*]] = mul nsw i64 1, [[TMP9]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[DOTVLA2__ADDR]], i64 [[TMP27]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP28]], 1 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[SUB]] to i64 -// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX]], i64 [[IDXPROM]] -// CHECK17-NEXT: store double [[CONV]], double* [[ARRAYIDX7]], align 8 -// CHECK17-NEXT: [[CONV8:%.*]] = fpext double [[CONV]] to x86_fp80 -// CHECK17-NEXT: [[TMP20:%.*]] = load x86_fp80*, x86_fp80** [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[TMP1]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[B9]], align 4 -// CHECK17-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK17-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds x86_fp80, x86_fp80* [[TMP20]], i64 [[IDXPROM10]] -// CHECK17-NEXT: store x86_fp80 [[CONV8]], x86_fp80* [[ARRAYIDX11]], align 16 -// CHECK17-NEXT: [[TMP22:%.*]] = bitcast double* [[DOTVLA2__ADDR]] to i8* -// CHECK17-NEXT: call void @__kmpc_free(i32 [[TMP12]], i8* [[TMP22]], i8* inttoptr (i64 8 to i8*)) +// CHECK17-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX]], i64 [[IDXPROM]] +// CHECK17-NEXT: store double [[CONV]], double* [[ARRAYIDX1]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = fpext double [[CONV]] to x86_fp80 +// CHECK17-NEXT: [[TMP29:%.*]] = load x86_fp80*, x86_fp80** [[TMP3]], align 8 +// CHECK17-NEXT: [[B3:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[TMP5]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[B3]], align 4 +// CHECK17-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK17-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds x86_fp80, x86_fp80* [[TMP29]], i64 [[IDXPROM4]] +// CHECK17-NEXT: store x86_fp80 [[CONV2]], x86_fp80* [[ARRAYIDX5]], align 16 +// CHECK17-NEXT: [[TMP31:%.*]] = bitcast double* [[DOTVLA2__ADDR]] to i8* +// CHECK17-NEXT: call void @__kmpc_free(i32 [[TMP21]], i8* [[TMP31]], i8* inttoptr (i64 8 to i8*)) // CHECK17-NEXT: ret void // diff --git a/clang/test/OpenMP/parallel_for_codegen.cpp b/clang/test/OpenMP/parallel_for_codegen.cpp --- a/clang/test/OpenMP/parallel_for_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_codegen.cpp @@ -245,25 +245,25 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: store double 5.000000e+00, double* [[A]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load double, double* [[A]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = fptosi double [[TMP0]] to i8 // CHECK1-NEXT: store i8 [[CONV]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK1-NEXT: store i8 [[TMP1]], i8* [[CONV1]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], i8* [[TMP1]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca double, align 8 @@ -274,94 +274,97 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[A:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[I5:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I4:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load double, double* undef, align 8 -// CHECK1-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP0]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load double, double* undef, align 8 +// CHECK1-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]] // CHECK1-NEXT: store double [[ADD]], double* [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double, double* [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK1-NEXT: [[SUB:%.*]] = fsub double [[TMP1]], 1.000000e+00 +// CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[DOTCAPTURE_EXPR_1]], align 8 +// CHECK1-NEXT: [[SUB:%.*]] = fsub double [[TMP4]], 1.000000e+00 // CHECK1-NEXT: [[DIV:%.*]] = fdiv double [[SUB]], 1.000000e+00 -// CHECK1-NEXT: [[CONV3:%.*]] = fptoui double [[DIV]] to i64 -// CHECK1-NEXT: [[SUB4:%.*]] = sub i64 [[CONV3]], 1 -// CHECK1-NEXT: store i64 [[SUB4]], i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = fptoui double [[DIV]] to i64 +// CHECK1-NEXT: [[SUB3:%.*]] = sub i64 [[CONV]], 1 +// CHECK1-NEXT: store i64 [[SUB3]], i64* [[DOTCAPTURE_EXPR_2]], align 8 // CHECK1-NEXT: store i64 1, i64* [[I]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double, double* [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP2]] +// CHECK1-NEXT: [[TMP5:%.*]] = load double, double* [[DOTCAPTURE_EXPR_1]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP5]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK1-NEXT: store i64 [[TMP3]], i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK1-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i64 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV6]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV5:%.*]] = sext i8 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV5]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp ugt i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ugt i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[ADD8:%.*]] = add i64 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp ult i64 [[TMP12]], [[ADD8]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP14]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp ult i64 [[TMP15]], [[ADD7]] +// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[ADD10:%.*]] = add i64 [[TMP15]], 1 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp ult i64 [[TMP14]], [[ADD10]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = add i64 [[TMP18]], 1 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i64 [[TMP17]], [[ADD9]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 1 -// CHECK1-NEXT: [[ADD12:%.*]] = add i64 1, [[MUL]] -// CHECK1-NEXT: store i64 [[ADD12]], i64* [[I5]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP19]], 1 +// CHECK1-NEXT: [[ADD11:%.*]] = add i64 1, [[MUL]] +// CHECK1-NEXT: store i64 [[ADD11]], i64* [[I4]], align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD13:%.*]] = add i64 [[TMP17]], 1 -// CHECK1-NEXT: store i64 [[ADD13]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD12:%.*]] = add i64 [[TMP20]], 1 +// CHECK1-NEXT: store i64 [[ADD12]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add i64 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD15:%.*]] = add i64 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add i64 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: store i64 [[ADD13]], i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD14:%.*]] = add i64 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -374,23 +377,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK1-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -400,79 +409,81 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX3]], align 4 -// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK1-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4 -// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK1-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]] +// CHECK1-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM2]] +// CHECK1-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK1-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM8]] // CHECK1-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -483,23 +494,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK1-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..2 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -509,79 +526,81 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK1-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX3]], align 4 -// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK1-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4 -// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK1-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]] +// CHECK1-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM2]] +// CHECK1-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK1-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM8]] // CHECK1-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -592,23 +611,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK1-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -618,96 +643,98 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK1-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4 -// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4 -// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK1-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] +// CHECK1-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM3]] +// CHECK1-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP25]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP27:%.*]] = load float, float* [[ARRAYIDX7]], align 4 +// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK1-NEXT: [[TMP28:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP28]], i64 [[IDXPROM9]] // CHECK1-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -718,23 +745,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK1-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..4 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -744,65 +777,67 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 1073741859, i64 0, i64 16908287, i64 1, i64 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 1073741859, i64 0, i64 16908287, i64 1, i64 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127 // CHECK1-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK1-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]] -// CHECK1-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]] +// CHECK1-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[TMP17]] +// CHECK1-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[TMP20]] +// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]] +// CHECK1-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[TMP23]] +// CHECK1-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP25]], i64 [[TMP26]] // CHECK1-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1 // CHECK1-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -820,23 +855,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK1-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..5 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -846,65 +887,67 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 1073741860, i64 0, i64 16908287, i64 1, i64 7) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 1073741860, i64 0, i64 16908287, i64 1, i64 7) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127 // CHECK1-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK1-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]] -// CHECK1-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]] +// CHECK1-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[TMP17]] +// CHECK1-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[TMP20]] +// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]] +// CHECK1-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[TMP23]] +// CHECK1-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP25]], i64 [[TMP26]] // CHECK1-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1 // CHECK1-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -924,26 +967,33 @@ // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[Y:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK1-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[X]], align 4 // CHECK1-NEXT: store i32 0, i32* [[Y]], align 4 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[Y]], float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[Y]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store float** [[A_ADDR]], float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store float** [[B_ADDR]], float*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float** [[C_ADDR]], float*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store float** [[D_ADDR]], float*** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[Y:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -959,21 +1009,23 @@ // CHECK1-NEXT: [[X8:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[Y]], i32** [[Y_ADDR]], align 8 -// CHECK1-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load float**, float*** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load float**, float*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[TMP11]] to i8 // CHECK1-NEXT: store i8 [[CONV]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP12:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP12]] to i32 // CHECK1-NEXT: [[SUB:%.*]] = sub i32 57, [[CONV3]] // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 @@ -981,85 +1033,85 @@ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], 11 // CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK1-NEXT: store i64 [[SUB5]], i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: store i8 [[TMP7]], i8* [[I]], align 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP13]], i8* [[I]], align 1 // CHECK1-NEXT: store i32 11, i32* [[X]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP14]] to i32 // CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[CONV6]], 57 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 1073741862, i64 0, i64 [[TMP10]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 1073741862, i64 0, i64 [[TMP16]], i64 1, i64 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP21]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP22]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !11 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !11 -// CHECK1-NEXT: [[CONV10:%.*]] = sext i8 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 -// CHECK1-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP20]], 11 +// CHECK1-NEXT: [[TMP25:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !11 +// CHECK1-NEXT: [[CONV10:%.*]] = sext i8 [[TMP25]] to i64 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP26]], 11 // CHECK1-NEXT: [[MUL12:%.*]] = mul nsw i64 [[DIV11]], 1 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[CONV10]], [[MUL12]] // CHECK1-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD13]] to i8 // CHECK1-NEXT: store i8 [[CONV14]], i8* [[I7]], align 1, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 -// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP22]], 11 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP28]], 11 // CHECK1-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 11 -// CHECK1-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP21]], [[MUL16]] +// CHECK1-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP27]], [[MUL16]] // CHECK1-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 1 // CHECK1-NEXT: [[SUB19:%.*]] = sub nsw i64 11, [[MUL18]] // CHECK1-NEXT: [[CONV20:%.*]] = trunc i64 [[SUB19]] to i32 // CHECK1-NEXT: store i32 [[CONV20]], i32* [[X8]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP24:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP27:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 -// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM21]] -// CHECK1-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX22]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[MUL23:%.*]] = fmul float [[TMP25]], [[TMP28]] -// CHECK1-NEXT: [[TMP29:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP29:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !11 // CHECK1-NEXT: [[TMP30:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 -// CHECK1-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP30]] to i64 -// CHECK1-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 [[IDXPROM24]] -// CHECK1-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX25]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP31]] -// CHECK1-NEXT: [[TMP32:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP32:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !11 // CHECK1-NEXT: [[TMP33:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 -// CHECK1-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP33]] to i64 -// CHECK1-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, float* [[TMP32]], i64 [[IDXPROM27]] +// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, float* [[TMP32]], i64 [[IDXPROM21]] +// CHECK1-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX22]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[MUL23:%.*]] = fmul float [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load float*, float** [[TMP10]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP36:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 +// CHECK1-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, float* [[TMP35]], i64 [[IDXPROM24]] +// CHECK1-NEXT: [[TMP37:%.*]] = load float, float* [[ARRAYIDX25]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP37]] +// CHECK1-NEXT: [[TMP38:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP39:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 +// CHECK1-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP39]] to i64 +// CHECK1-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, float* [[TMP38]], i64 [[IDXPROM27]] // CHECK1-NEXT: store float [[MUL26]], float* [[ARRAYIDX28]], align 4, !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 -// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP34]], 1 +// CHECK1-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP40]], 1 // CHECK1-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1080,24 +1132,30 @@ // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK1-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[X]], align 4 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1109,78 +1167,80 @@ // CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 1073741861, i32 0, i32 199, i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 1073741861, i32 0, i32 199, i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 20 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 20 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]] // CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV]], i8* [[I]], align 1, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP12]], 20 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP17]], 20 // CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 20 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL3]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], [[MUL3]] // CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 -10, [[MUL4]] // CHECK1-NEXT: store i32 [[ADD5]], i32* [[X]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 -// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP17:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 -// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP20:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM9]] -// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]] -// CHECK1-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 -// CHECK1-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64 -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM12]] +// CHECK1-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 +// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP22:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP23:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK1-NEXT: [[TMP24:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP25:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 +// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP24]], i64 [[IDXPROM9]] +// CHECK1-NEXT: [[TMP26:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP26]] +// CHECK1-NEXT: [[TMP27:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP28:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 +// CHECK1-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[TMP27]], i64 [[IDXPROM12]] // CHECK1-NEXT: store float [[MUL11]], float* [[ARRAYIDX13]], align 4, !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1205,7 +1265,7 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 @@ -1214,24 +1274,26 @@ // CHECK1-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 // CHECK1-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 16 // CHECK1-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, i64, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), float** [[A_ADDR]], i64 [[TMP1]], i64 [[TMP4]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store float** [[A_ADDR]], float*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP7]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[N:%.*]]) #[[ATTR1]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1243,107 +1305,110 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA1:%.*]] = alloca float, i64 [[TMP1]], align 16 -// CHECK1-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK1-NEXT: [[TMP7:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP7]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP4]], align 16 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], 16908288 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP10]], 16908288 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] // CHECK1: omp.dispatch.cleanup: // CHECK1-NEXT: br label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP12]], 127 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP17]], 127 // CHECK1-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: [[CALL:%.*]] = invoke noundef i32 @_Z3foov() // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[CALL]] to float -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA1]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = fadd float [[CONV4]], [[TMP14]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = sitofp i32 [[TMP15]] to float -// CHECK1-NEXT: [[ADD7:%.*]] = fadd float [[ADD5]], [[CONV6]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM8]] -// CHECK1-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = fadd float [[TMP18]], [[ADD7]] -// CHECK1-NEXT: store float [[ADD10]], float* [[ARRAYIDX9]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[CALL]] to float +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = fadd float [[CONV]], [[TMP19]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP20]] to float +// CHECK1-NEXT: [[ADD5:%.*]] = fadd float [[ADD3]], [[CONV4]] +// CHECK1-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP23:%.*]] = load float, float* [[ARRAYIDX7]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = fadd float [[TMP23]], [[ADD5]] +// CHECK1-NEXT: store float [[ADD8]], float* [[ARRAYIDX7]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP24]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]]) +// CHECK1-NEXT: [[TMP29:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP29]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP25:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP30:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP26:%.*]] = extractvalue { i8*, i32 } [[TMP25]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP26]]) #[[ATTR7:[0-9]+]] +// CHECK1-NEXT: [[TMP31:%.*]] = extractvalue { i8*, i32 } [[TMP30]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP31]]) #[[ATTR7:[0-9]+]] // CHECK1-NEXT: unreachable // // @@ -1359,25 +1424,25 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK2-NEXT: store double 5.000000e+00, double* [[A]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load double, double* [[A]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = fptosi double [[TMP0]] to i8 // CHECK2-NEXT: store i8 [[CONV]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK2-NEXT: store i8 [[TMP1]], i8* [[CONV1]], align 1 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: store i8 [[TMP2]], i8* [[TMP1]], align 1 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca double, align 8 @@ -1388,94 +1453,97 @@ // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[A:%.*]] = alloca double, align 8 -// CHECK2-NEXT: [[I5:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[I4:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK2-NEXT: [[TMP0:%.*]] = load double, double* undef, align 8 -// CHECK2-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP0]] +// CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK2-NEXT: store i8 [[TMP2]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[TMP3:%.*]] = load double, double* undef, align 8 +// CHECK2-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]] // CHECK2-NEXT: store double [[ADD]], double* [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load double, double* [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK2-NEXT: [[SUB:%.*]] = fsub double [[TMP1]], 1.000000e+00 +// CHECK2-NEXT: [[TMP4:%.*]] = load double, double* [[DOTCAPTURE_EXPR_1]], align 8 +// CHECK2-NEXT: [[SUB:%.*]] = fsub double [[TMP4]], 1.000000e+00 // CHECK2-NEXT: [[DIV:%.*]] = fdiv double [[SUB]], 1.000000e+00 -// CHECK2-NEXT: [[CONV3:%.*]] = fptoui double [[DIV]] to i64 -// CHECK2-NEXT: [[SUB4:%.*]] = sub i64 [[CONV3]], 1 -// CHECK2-NEXT: store i64 [[SUB4]], i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = fptoui double [[DIV]] to i64 +// CHECK2-NEXT: [[SUB3:%.*]] = sub i64 [[CONV]], 1 +// CHECK2-NEXT: store i64 [[SUB3]], i64* [[DOTCAPTURE_EXPR_2]], align 8 // CHECK2-NEXT: store i64 1, i64* [[I]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load double, double* [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP2]] +// CHECK2-NEXT: [[TMP5:%.*]] = load double, double* [[DOTCAPTURE_EXPR_1]], align 8 +// CHECK2-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP5]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK2-NEXT: store i64 [[TMP3]], i64* [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK2-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_UB]], align 8 // CHECK2-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK2-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i64 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV6]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[CONV5:%.*]] = sext i8 [[TMP7]] to i64 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV5]]) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp ugt i64 [[TMP7]], [[TMP8]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp ugt i64 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK2-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK2-NEXT: [[ADD8:%.*]] = add i64 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP9:%.*]] = icmp ult i64 [[TMP12]], [[ADD8]] -// CHECK2-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP14]], i64* [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1 +// CHECK2-NEXT: [[CMP8:%.*]] = icmp ult i64 [[TMP15]], [[ADD7]] +// CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK2-NEXT: [[ADD10:%.*]] = add i64 [[TMP15]], 1 -// CHECK2-NEXT: [[CMP11:%.*]] = icmp ult i64 [[TMP14]], [[ADD10]] -// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[ADD9:%.*]] = add i64 [[TMP18]], 1 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp ult i64 [[TMP17]], [[ADD9]] +// CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 1 -// CHECK2-NEXT: [[ADD12:%.*]] = add i64 1, [[MUL]] -// CHECK2-NEXT: store i64 [[ADD12]], i64* [[I5]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP19]], 1 +// CHECK2-NEXT: [[ADD11:%.*]] = add i64 1, [[MUL]] +// CHECK2-NEXT: store i64 [[ADD11]], i64* [[I4]], align 8 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[ADD13:%.*]] = add i64 [[TMP17]], 1 -// CHECK2-NEXT: store i64 [[ADD13]], i64* [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[ADD12:%.*]] = add i64 [[TMP20]], 1 +// CHECK2-NEXT: store i64 [[ADD12]], i64* [[DOTOMP_IV]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK2-NEXT: [[ADD14:%.*]] = add i64 [[TMP18]], [[TMP19]] -// CHECK2-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK2-NEXT: [[ADD15:%.*]] = add i64 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK2-NEXT: [[ADD13:%.*]] = add i64 [[TMP21]], [[TMP22]] +// CHECK2-NEXT: store i64 [[ADD13]], i64* [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK2-NEXT: [[ADD14:%.*]] = add i64 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_UB]], align 8 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK2-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -1488,23 +1556,29 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK2-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1514,79 +1588,81 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK2-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK2-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK2-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK2-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM2]] -// CHECK2-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX3]], align 4 -// CHECK2-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK2-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]] -// CHECK2-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4 -// CHECK2-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK2-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]] +// CHECK2-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM2]] +// CHECK2-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK2-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK2-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM5]] +// CHECK2-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK2-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK2-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM8]] // CHECK2-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK2-NEXT: ret void // // @@ -1597,23 +1673,29 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK2-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..2 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1623,79 +1705,81 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK2-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK2-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK2-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK2-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK2-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM2]] -// CHECK2-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX3]], align 4 -// CHECK2-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK2-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]] -// CHECK2-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4 -// CHECK2-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK2-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]] +// CHECK2-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM2]] +// CHECK2-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK2-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK2-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM5]] +// CHECK2-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK2-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK2-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM8]] // CHECK2-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK2-NEXT: ret void // // @@ -1706,23 +1790,29 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK2-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1732,96 +1822,98 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK2-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK2-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK2-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK2-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK2-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK2-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4 -// CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK2-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK2-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4 -// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK2-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] +// CHECK2-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP4]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP6]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM3]] +// CHECK2-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK2-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP8]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP25]], i64 [[IDXPROM6]] +// CHECK2-NEXT: [[TMP27:%.*]] = load float, float* [[ARRAYIDX7]], align 4 +// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK2-NEXT: [[TMP28:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP28]], i64 [[IDXPROM9]] // CHECK2-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK2-NEXT: ret void // // @@ -1832,23 +1924,29 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK2-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..4 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -1858,65 +1956,67 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK2-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK2-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK2-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK2-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK2-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK2-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 // CHECK2-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 35, i64 0, i64 16908287, i64 1, i64 1) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 35, i64 0, i64 16908287, i64 1, i64 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) -// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK2-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) +// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_IV]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !5 -// CHECK2-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127 // CHECK2-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK2-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !5 -// CHECK2-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !5 -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]] -// CHECK2-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !5 -// CHECK2-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !5 -// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 -// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]] -// CHECK2-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !5 -// CHECK2-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK2-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !5 -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 -// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]] -// CHECK2-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !5 -// CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK2-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !5 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 -// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]] +// CHECK2-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[TMP17]] +// CHECK2-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[TMP20]] +// CHECK2-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]] +// CHECK2-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[TMP23]] +// CHECK2-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]] +// CHECK2-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP25]], i64 [[TMP26]] // CHECK2-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !5 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1 // CHECK2-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK2: omp.inner.for.end: @@ -1934,23 +2034,29 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK2-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..5 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -1960,65 +2066,67 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK2-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK2-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK2-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK2-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK2-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK2-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 // CHECK2-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 36, i64 0, i64 16908287, i64 1, i64 7) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 36, i64 0, i64 16908287, i64 1, i64 7) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) -// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK2-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) +// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_IV]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !8 -// CHECK2-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 -// CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127 // CHECK2-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK2-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !8 -// CHECK2-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !8 -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]] -// CHECK2-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !8 -// CHECK2-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !8 -// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 -// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]] -// CHECK2-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !8 -// CHECK2-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK2-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !8 -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 -// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]] -// CHECK2-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !8 -// CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK2-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !8 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 -// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]] +// CHECK2-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[TMP17]] +// CHECK2-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[TMP20]] +// CHECK2-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]] +// CHECK2-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[TMP23]] +// CHECK2-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]] +// CHECK2-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP25]], i64 [[TMP26]] // CHECK2-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !8 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 -// CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1 // CHECK2-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK2: omp.inner.for.end: @@ -2038,26 +2146,33 @@ // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 // CHECK2-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[Y:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK2-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[X]], align 4 // CHECK2-NEXT: store i32 0, i32* [[Y]], align 4 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[Y]], float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i32* [[Y]], i32** [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store float** [[A_ADDR]], float*** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store float** [[B_ADDR]], float*** [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store float** [[C_ADDR]], float*** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: store float** [[D_ADDR]], float*** [[TMP4]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[Y:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[Y_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2073,21 +2188,23 @@ // CHECK2-NEXT: [[X8:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[Y]], i32** [[Y_ADDR]], align 8 -// CHECK2-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK2-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK2-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK2-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[Y_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load float**, float*** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 +// CHECK2-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load float**, float*** [[TMP9]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i32 [[TMP11]] to i8 // CHECK2-NEXT: store i8 [[CONV]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: [[TMP6:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i8 [[TMP6]] to i32 +// CHECK2-NEXT: [[TMP12:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[CONV3:%.*]] = sext i8 [[TMP12]] to i32 // CHECK2-NEXT: [[SUB:%.*]] = sub i32 57, [[CONV3]] // CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB]], 1 // CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 @@ -2095,85 +2212,85 @@ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], 11 // CHECK2-NEXT: [[SUB5:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK2-NEXT: store i64 [[SUB5]], i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: store i8 [[TMP7]], i8* [[I]], align 1 +// CHECK2-NEXT: [[TMP13:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: store i8 [[TMP13]], i8* [[I]], align 1 // CHECK2-NEXT: store i32 11, i32* [[X]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: [[CONV6:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[CONV6:%.*]] = sext i8 [[TMP14]] to i32 // CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[CONV6]], 57 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK2-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTOMP_UB]], align 8 // CHECK2-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK2-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 38, i64 0, i64 [[TMP10]], i64 1, i64 1) +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK2-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 38, i64 0, i64 [[TMP16]], i64 1, i64 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) -// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) +// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP21]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP22]], i64* [[DOTOMP_IV]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !11 -// CHECK2-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP23]], [[TMP24]] // CHECK2-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !11 -// CHECK2-NEXT: [[CONV10:%.*]] = sext i8 [[TMP19]] to i64 -// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 -// CHECK2-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP20]], 11 +// CHECK2-NEXT: [[TMP25:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !11 +// CHECK2-NEXT: [[CONV10:%.*]] = sext i8 [[TMP25]] to i64 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP26]], 11 // CHECK2-NEXT: [[MUL12:%.*]] = mul nsw i64 [[DIV11]], 1 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[CONV10]], [[MUL12]] // CHECK2-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD13]] to i8 // CHECK2-NEXT: store i8 [[CONV14]], i8* [[I7]], align 1, !llvm.access.group !11 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 -// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP22]], 11 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP28]], 11 // CHECK2-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 11 -// CHECK2-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP21]], [[MUL16]] +// CHECK2-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP27]], [[MUL16]] // CHECK2-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 1 // CHECK2-NEXT: [[SUB19:%.*]] = sub nsw i64 11, [[MUL18]] // CHECK2-NEXT: [[CONV20:%.*]] = trunc i64 [[SUB19]] to i32 // CHECK2-NEXT: store i32 [[CONV20]], i32* [[X8]], align 4, !llvm.access.group !11 -// CHECK2-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !11 -// CHECK2-NEXT: [[TMP24:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP24]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 -// CHECK2-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !11 -// CHECK2-NEXT: [[TMP27:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 -// CHECK2-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP27]] to i64 -// CHECK2-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM21]] -// CHECK2-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX22]], align 4, !llvm.access.group !11 -// CHECK2-NEXT: [[MUL23:%.*]] = fmul float [[TMP25]], [[TMP28]] -// CHECK2-NEXT: [[TMP29:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP29:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !11 // CHECK2-NEXT: [[TMP30:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 -// CHECK2-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP30]] to i64 -// CHECK2-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 [[IDXPROM24]] -// CHECK2-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX25]], align 4, !llvm.access.group !11 -// CHECK2-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP31]] -// CHECK2-NEXT: [[TMP32:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP30]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP32:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !11 // CHECK2-NEXT: [[TMP33:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 -// CHECK2-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP33]] to i64 -// CHECK2-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, float* [[TMP32]], i64 [[IDXPROM27]] +// CHECK2-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP33]] to i64 +// CHECK2-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, float* [[TMP32]], i64 [[IDXPROM21]] +// CHECK2-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX22]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[MUL23:%.*]] = fmul float [[TMP31]], [[TMP34]] +// CHECK2-NEXT: [[TMP35:%.*]] = load float*, float** [[TMP10]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP36:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 +// CHECK2-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP36]] to i64 +// CHECK2-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, float* [[TMP35]], i64 [[IDXPROM24]] +// CHECK2-NEXT: [[TMP37:%.*]] = load float, float* [[ARRAYIDX25]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP37]] +// CHECK2-NEXT: [[TMP38:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP39:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 +// CHECK2-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP39]] to i64 +// CHECK2-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, float* [[TMP38]], i64 [[IDXPROM27]] // CHECK2-NEXT: store float [[MUL26]], float* [[ARRAYIDX28]], align 4, !llvm.access.group !11 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 -// CHECK2-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP34]], 1 +// CHECK2-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP40]], 1 // CHECK2-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: omp.inner.for.end: @@ -2194,24 +2311,30 @@ // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 // CHECK2-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK2-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[X]], align 4 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2223,78 +2346,80 @@ // CHECK2-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK2-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK2-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK2-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK2-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 37, i32 0, i32 199, i32 1, i32 1) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 37, i32 0, i32 199, i32 1, i32 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK2-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 20 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 20 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]] // CHECK2-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 // CHECK2-NEXT: store i8 [[CONV]], i8* [[I]], align 1, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK2-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP12]], 20 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP17]], 20 // CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 20 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL3]] +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], [[MUL3]] // CHECK2-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 -10, [[MUL4]] // CHECK2-NEXT: store i32 [[ADD5]], i32* [[X]], align 4, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 -// CHECK2-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP17:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 -// CHECK2-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64 -// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM6]] -// CHECK2-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !14 -// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK2-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP20:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 -// CHECK2-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64 -// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM9]] -// CHECK2-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 -// CHECK2-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]] -// CHECK2-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP23:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 -// CHECK2-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64 -// CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM12]] +// CHECK2-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP19:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 +// CHECK2-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP19]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP22:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 +// CHECK2-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP22]] to i64 +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM6]] +// CHECK2-NEXT: [[TMP23:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK2-NEXT: [[TMP24:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP25:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 +// CHECK2-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP25]] to i64 +// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP24]], i64 [[IDXPROM9]] +// CHECK2-NEXT: [[TMP26:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP26]] +// CHECK2-NEXT: [[TMP27:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP28:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 +// CHECK2-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP28]] to i64 +// CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[TMP27]], i64 [[IDXPROM12]] // CHECK2-NEXT: store float [[MUL11]], float* [[ARRAYIDX13]], align 4, !llvm.access.group !14 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK2-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK2: omp.inner.for.end: @@ -2319,7 +2444,7 @@ // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 @@ -2328,24 +2453,26 @@ // CHECK2-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 // CHECK2-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 16 // CHECK2-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, i64, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), float** [[A_ADDR]], i64 [[TMP1]], i64 [[TMP4]]) -// CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP5]]) +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store float** [[A_ADDR]], float*** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store i64 [[TMP1]], i64* [[TMP4]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP7]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[N:%.*]]) #[[ATTR1]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2357,107 +2484,110 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK2-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() -// CHECK2-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 -// CHECK2-NEXT: [[VLA1:%.*]] = alloca float, i64 [[TMP1]], align 16 -// CHECK2-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK2-NEXT: [[TMP7:%.*]] = call i8* @llvm.stacksave() +// CHECK2-NEXT: store i8* [[TMP7]], i8** [[SAVED_STACK]], align 8 +// CHECK2-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP4]], align 16 +// CHECK2-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR0]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], 16908288 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP10]], 16908288 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP8]], [[TMP9]] -// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] // CHECK2: omp.dispatch.cleanup: // CHECK2-NEXT: br label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP15]], [[TMP16]] +// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP12]], 127 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP17]], 127 // CHECK2-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK2-NEXT: [[CALL:%.*]] = invoke noundef i32 @_Z3foov() // CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK2: invoke.cont: -// CHECK2-NEXT: [[CONV4:%.*]] = sitofp i32 [[CALL]] to float -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA1]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD5:%.*]] = fadd float [[CONV4]], [[TMP14]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: [[CONV6:%.*]] = sitofp i32 [[TMP15]] to float -// CHECK2-NEXT: [[ADD7:%.*]] = fadd float [[ADD5]], [[CONV6]] -// CHECK2-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM8:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM8]] -// CHECK2-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = fadd float [[TMP18]], [[ADD7]] -// CHECK2-NEXT: store float [[ADD10]], float* [[ARRAYIDX9]], align 4 +// CHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[CALL]] to float +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = fadd float [[CONV]], [[TMP19]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP20]] to float +// CHECK2-NEXT: [[ADD5:%.*]] = fadd float [[ADD3]], [[CONV4]] +// CHECK2-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM6]] +// CHECK2-NEXT: [[TMP23:%.*]] = load float, float* [[ARRAYIDX7]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = fadd float [[TMP23]], [[ADD5]] +// CHECK2-NEXT: store float [[ADD8]], float* [[ARRAYIDX7]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP19]], 1 -// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add i32 [[TMP24]], 1 +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD13:%.*]] = add i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP24:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP24]]) +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]]) +// CHECK2-NEXT: [[TMP29:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP29]]) // CHECK2-NEXT: ret void // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP25:%.*]] = landingpad { i8*, i32 } +// CHECK2-NEXT: [[TMP30:%.*]] = landingpad { i8*, i32 } // CHECK2-NEXT: catch i8* null -// CHECK2-NEXT: [[TMP26:%.*]] = extractvalue { i8*, i32 } [[TMP25]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP26]]) #[[ATTR7:[0-9]+]] +// CHECK2-NEXT: [[TMP31:%.*]] = extractvalue { i8*, i32 } [[TMP30]], 0 +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP31]]) #[[ATTR7:[0-9]+]] // CHECK2-NEXT: unreachable // // @@ -2473,25 +2603,25 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK5-NEXT: store double 5.000000e+00, double* [[A]], align 8, !dbg [[DBG10:![0-9]+]] // CHECK5-NEXT: [[TMP0:%.*]] = load double, double* [[A]], align 8, !dbg [[DBG11:![0-9]+]] // CHECK5-NEXT: [[CONV:%.*]] = fptosi double [[TMP0]] to i8, !dbg [[DBG11]] // CHECK5-NEXT: store i8 [[CONV]], i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG11]] -// CHECK5-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG11]] -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8*, !dbg [[DBG11]] -// CHECK5-NEXT: store i8 [[TMP1]], i8* [[CONV1]], align 1, !dbg [[DBG11]] -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !dbg [[DBG11]] -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP2]]), !dbg [[DBG11]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG11]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG11]] +// CHECK5-NEXT: store i8 [[TMP2]], i8* [[TMP1]], align 1, !dbg [[DBG11]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG11]] // CHECK5-NEXT: ret void, !dbg [[DBG12:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG13:![0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG13:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca double, align 8 @@ -2502,94 +2632,97 @@ // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[A:%.*]] = alloca double, align 8 -// CHECK5-NEXT: [[I5:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[I4:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8*, !dbg [[DBG14:![0-9]+]] -// CHECK5-NEXT: [[TMP0:%.*]] = load double, double* undef, align 8, !dbg [[DBG15:![0-9]+]] -// CHECK5-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP0]], !dbg [[DBG15]] +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG14:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1, !dbg [[DBG14]] +// CHECK5-NEXT: store i8 [[TMP2]], i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP3:%.*]] = load double, double* undef, align 8, !dbg [[DBG15:![0-9]+]] +// CHECK5-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]], !dbg [[DBG15]] // CHECK5-NEXT: store double [[ADD]], double* [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[TMP1:%.*]] = load double, double* [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[SUB:%.*]] = fsub double [[TMP1]], 1.000000e+00, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP4:%.*]] = load double, double* [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[SUB:%.*]] = fsub double [[TMP4]], 1.000000e+00, !dbg [[DBG15]] // CHECK5-NEXT: [[DIV:%.*]] = fdiv double [[SUB]], 1.000000e+00, !dbg [[DBG15]] -// CHECK5-NEXT: [[CONV3:%.*]] = fptoui double [[DIV]] to i64, !dbg [[DBG15]] -// CHECK5-NEXT: [[SUB4:%.*]] = sub i64 [[CONV3]], 1, !dbg [[DBG15]] -// CHECK5-NEXT: store i64 [[SUB4]], i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[CONV:%.*]] = fptoui double [[DIV]] to i64, !dbg [[DBG15]] +// CHECK5-NEXT: [[SUB3:%.*]] = sub i64 [[CONV]], 1, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[SUB3]], i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG15]] // CHECK5-NEXT: store i64 1, i64* [[I]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[TMP2:%.*]] = load double, double* [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP2]], !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP5:%.*]] = load double, double* [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP5]], !dbg [[DBG15]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]], !dbg [[DBG14]] // CHECK5: omp.precond.then: // CHECK5-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: store i64 [[TMP3]], i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] // CHECK5-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG15]] // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG15]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1, !dbg [[DBG14]] -// CHECK5-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i64, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG14]] -// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV6]]), !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG14]] +// CHECK5-NEXT: [[CONV5:%.*]] = sext i8 [[TMP7]] to i64, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4, !dbg [[DBG14]] +// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV5]]), !dbg [[DBG14]] // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG14]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[CMP7:%.*]] = icmp ugt i64 [[TMP7]], [[TMP8]], !dbg [[DBG15]] -// CHECK5-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[CMP6:%.*]] = icmp ugt i64 [[TMP10]], [[TMP11]], !dbg [[DBG15]] +// CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG15]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG15]] // CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG15]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] // CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG15]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ], !dbg [[DBG15]] +// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ], !dbg [[DBG15]] // CHECK5-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[ADD8:%.*]] = add i64 [[TMP13]], 1, !dbg [[DBG15]] -// CHECK5-NEXT: [[CMP9:%.*]] = icmp ult i64 [[TMP12]], [[ADD8]], !dbg [[DBG15]] -// CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[TMP14]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1, !dbg [[DBG15]] +// CHECK5-NEXT: [[CMP8:%.*]] = icmp ult i64 [[TMP15]], [[ADD7]], !dbg [[DBG15]] +// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG14]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG14]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[ADD10:%.*]] = add i64 [[TMP15]], 1, !dbg [[DBG15]] -// CHECK5-NEXT: [[CMP11:%.*]] = icmp ult i64 [[TMP14]], [[ADD10]], !dbg [[DBG15]] -// CHECK5-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD9:%.*]] = add i64 [[TMP18]], 1, !dbg [[DBG15]] +// CHECK5-NEXT: [[CMP10:%.*]] = icmp ult i64 [[TMP17]], [[ADD9]], !dbg [[DBG15]] +// CHECK5-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG14]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 1, !dbg [[DBG15]] -// CHECK5-NEXT: [[ADD12:%.*]] = add i64 1, [[MUL]], !dbg [[DBG15]] -// CHECK5-NEXT: store i64 [[ADD12]], i64* [[I5]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP19]], 1, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD11:%.*]] = add i64 1, [[MUL]], !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[ADD11]], i64* [[I4]], align 8, !dbg [[DBG15]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG16:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG14]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[ADD13:%.*]] = add i64 [[TMP17]], 1, !dbg [[DBG15]] -// CHECK5-NEXT: store i64 [[ADD13]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD12:%.*]] = add i64 [[TMP20]], 1, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[ADD12]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG15]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG14]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG14]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[ADD14:%.*]] = add i64 [[TMP18]], [[TMP19]], !dbg [[DBG15]] -// CHECK5-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_LB]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[ADD15:%.*]] = add i64 [[TMP20]], [[TMP21]], !dbg [[DBG15]] -// CHECK5-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD13:%.*]] = add i64 [[TMP21]], [[TMP22]], !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[ADD13]], i64* [[DOTOMP_LB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD14:%.*]] = add i64 [[TMP23]], [[TMP24]], !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG14]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4, !dbg [[DBG14]] -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP23]]), !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4, !dbg [[DBG14]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP26]]), !dbg [[DBG14]] // CHECK5-NEXT: br label [[OMP_PRECOND_END]], !dbg [[DBG14]] // CHECK5: omp.precond.end: // CHECK5-NEXT: ret void, !dbg [[DBG16]] @@ -2602,23 +2735,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK5-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK5-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK5-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB9:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]), !dbg [[DBG22:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG22:![0-9]+]] +// CHECK5-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8, !dbg [[DBG22]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG22]] +// CHECK5-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8, !dbg [[DBG22]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG22]] +// CHECK5-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8, !dbg [[DBG22]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG22]] +// CHECK5-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8, !dbg [[DBG22]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB9:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG22]] // CHECK5-NEXT: ret void, !dbg [[DBG23:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG24:![0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] !dbg [[DBG24:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2628,79 +2767,81 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK5-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK5-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK5-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG25:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8, !dbg [[DBG25]] -// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8, !dbg [[DBG25]] -// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8, !dbg [[DBG25]] +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG25:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8, !dbg [[DBG25]] // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG26:![0-9]+]] // CHECK5-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG26]] // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG26]] // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG25]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG25]] -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6:[0-9]+]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG25]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4, !dbg [[DBG25]] +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423, !dbg [[DBG26]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG26]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG26]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG26]] // CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG26]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ], !dbg [[DBG26]] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG26]] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG26]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG25]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]], !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG26]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG25]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7, !dbg [[DBG26]] // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]], !dbg [[DBG26]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8, !dbg [[DBG27:![0-9]+]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG27]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG27]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]], !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG27]] -// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG27]] -// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM2]], !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !dbg [[DBG27]] -// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]], !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8, !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG27]] -// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG27]] -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]], !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !dbg [[DBG27]] -// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]], !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG27]] -// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG27]] -// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 8, !dbg [[DBG27:![0-9]+]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG27]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 8, !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG27]] +// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM2]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 8, !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG27]] +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM5]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64, !dbg [[DBG27]] +// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM8]], !dbg [[DBG27]] // CHECK5-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4, !dbg [[DBG27]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG28:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG25]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], 1, !dbg [[DBG26]] // CHECK5-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG26]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG25]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG25]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB8:[0-9]+]], i32 [[TMP5]]), !dbg [[DBG25]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB8:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG25]] // CHECK5-NEXT: ret void, !dbg [[DBG28]] // // @@ -2711,23 +2852,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK5-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK5-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK5-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB14:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..2 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]), !dbg [[DBG31:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG31:![0-9]+]] +// CHECK5-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8, !dbg [[DBG31]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG31]] +// CHECK5-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8, !dbg [[DBG31]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG31]] +// CHECK5-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8, !dbg [[DBG31]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG31]] +// CHECK5-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8, !dbg [[DBG31]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB14:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG31]] // CHECK5-NEXT: ret void, !dbg [[DBG32:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG33:![0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] !dbg [[DBG33:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2737,79 +2884,81 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK5-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK5-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK5-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG34:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8, !dbg [[DBG34]] -// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8, !dbg [[DBG34]] -// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8, !dbg [[DBG34]] +// CHECK5-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG34:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8, !dbg [[DBG34]] // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG35:![0-9]+]] // CHECK5-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG35]] // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG35]] // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG34]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG34]] -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB11:[0-9]+]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG34]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4, !dbg [[DBG34]] +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB11:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423, !dbg [[DBG35]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG35]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG35]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG35]] // CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG35]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ], !dbg [[DBG35]] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG35]] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG35]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG34]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]], !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG35]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG34]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7, !dbg [[DBG35]] // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]], !dbg [[DBG35]] // CHECK5-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8, !dbg [[DBG36:![0-9]+]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG36]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG36]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]], !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG36]] -// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG36]] -// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM2]], !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !dbg [[DBG36]] -// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]], !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8, !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG36]] -// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG36]] -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]], !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !dbg [[DBG36]] -// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]], !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG36]] -// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG36]] -// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 8, !dbg [[DBG36:![0-9]+]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG36]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 8, !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG36]] +// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM2]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 8, !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG36]] +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM5]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64, !dbg [[DBG36]] +// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM8]], !dbg [[DBG36]] // CHECK5-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4, !dbg [[DBG36]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG37:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG34]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1, !dbg [[DBG35]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG35]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG34]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG34]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB13:[0-9]+]], i32 [[TMP5]]), !dbg [[DBG34]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB13:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG34]] // CHECK5-NEXT: ret void, !dbg [[DBG37]] // // @@ -2820,23 +2969,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK5-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK5-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK5-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB19:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]), !dbg [[DBG40:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG40:![0-9]+]] +// CHECK5-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8, !dbg [[DBG40]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG40]] +// CHECK5-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8, !dbg [[DBG40]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG40]] +// CHECK5-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8, !dbg [[DBG40]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG40]] +// CHECK5-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8, !dbg [[DBG40]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB19:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG40]] // CHECK5-NEXT: ret void, !dbg [[DBG41:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG42:![0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] !dbg [[DBG42:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2846,96 +3001,98 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK5-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK5-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK5-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG43:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8, !dbg [[DBG43]] -// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8, !dbg [[DBG43]] -// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8, !dbg [[DBG43]] +// CHECK5-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG43:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8, !dbg [[DBG43]] // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG44:![0-9]+]] // CHECK5-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG44]] // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG44]] // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG43]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB16:[0-9]+]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5), !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4, !dbg [[DBG43]] +// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB16:[0-9]+]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5), !dbg [[DBG43]] // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG43]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288, !dbg [[DBG44]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG44]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG44]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG44]] // CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG44]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ], !dbg [[DBG44]] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG44]] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]], !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]], !dbg [[DBG44]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG43]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG43]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]], !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]], !dbg [[DBG44]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG43]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127, !dbg [[DBG44]] // CHECK5-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]], !dbg [[DBG44]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !dbg [[DBG45:![0-9]+]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG45]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64, !dbg [[DBG45]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]], !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG45]] -// CHECK5-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64, !dbg [[DBG45]] -// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]], !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !dbg [[DBG45]] -// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]], !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG45]] -// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64, !dbg [[DBG45]] -// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]], !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !dbg [[DBG45]] -// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]], !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG45]] -// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64, !dbg [[DBG45]] -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP4]], align 8, !dbg [[DBG45:![0-9]+]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64, !dbg [[DBG45]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP6]], align 8, !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64, !dbg [[DBG45]] +// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM3]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP8]], align 8, !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64, !dbg [[DBG45]] +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP25]], i64 [[IDXPROM6]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP27:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP28:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64, !dbg [[DBG45]] +// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP28]], i64 [[IDXPROM9]], !dbg [[DBG45]] // CHECK5-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !dbg [[DBG45]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG46:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG43]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1, !dbg [[DBG44]] // CHECK5-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG44]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG43]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG43]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]], !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]], !dbg [[DBG44]] // CHECK5-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]], !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]], !dbg [[DBG44]] // CHECK5-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG44]] // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG43]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB18:[0-9]+]], i32 [[TMP5]]), !dbg [[DBG43]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB18:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG43]] // CHECK5-NEXT: ret void, !dbg [[DBG46]] // // @@ -2946,23 +3103,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK5-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK5-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK5-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB21:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..4 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]), !dbg [[DBG50:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG50:![0-9]+]] +// CHECK5-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8, !dbg [[DBG50]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG50]] +// CHECK5-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8, !dbg [[DBG50]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG50]] +// CHECK5-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8, !dbg [[DBG50]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG50]] +// CHECK5-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8, !dbg [[DBG50]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB21:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG50]] // CHECK5-NEXT: ret void, !dbg [[DBG51:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG52:![0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] !dbg [[DBG52:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -2972,65 +3135,67 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK5-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK5-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK5-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG53:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8, !dbg [[DBG53]] -// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8, !dbg [[DBG53]] -// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8, !dbg [[DBG53]] +// CHECK5-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG53:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8, !dbg [[DBG53]] // CHECK5-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG54:![0-9]+]] // CHECK5-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG54]] // CHECK5-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG54]] // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG54]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG53]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG53]] -// CHECK5-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB21]], i32 [[TMP5]], i32 1073741859, i64 0, i64 16908287, i64 1, i64 1), !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4, !dbg [[DBG53]] +// CHECK5-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB21]], i32 [[TMP10]], i32 1073741859, i64 0, i64 16908287, i64 1, i64 1), !dbg [[DBG53]] // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG53]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB21]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]), !dbg [[DBG53]] -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB21]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]), !dbg [[DBG53]] +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0, !dbg [[DBG53]] // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG53]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG54]] -// CHECK5-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG54]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG54]] +// CHECK5-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG54]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG53]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group !55 -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG54]], !llvm.access.group !55 -// CHECK5-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1, !dbg [[DBG54]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]], !dbg [[DBG54]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group !55 +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG54]], !llvm.access.group !55 +// CHECK5-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1, !dbg [[DBG54]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]], !dbg [[DBG54]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG53]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group !55 -// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127, !dbg [[DBG54]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group !55 +// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127, !dbg [[DBG54]] // CHECK5-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]], !dbg [[DBG54]] // CHECK5-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !dbg [[DBG54]], !llvm.access.group !55 -// CHECK5-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !dbg [[DBG56:![0-9]+]], !llvm.access.group !55 -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG56]], !llvm.access.group !55 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]], !dbg [[DBG56]] -// CHECK5-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG56]], !llvm.access.group !55 -// CHECK5-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG56]], !llvm.access.group !55 -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG56]], !llvm.access.group !55 -// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]], !dbg [[DBG56]] -// CHECK5-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !dbg [[DBG56]], !llvm.access.group !55 -// CHECK5-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]], !dbg [[DBG56]] -// CHECK5-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !dbg [[DBG56]], !llvm.access.group !55 -// CHECK5-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG56]], !llvm.access.group !55 -// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]], !dbg [[DBG56]] -// CHECK5-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !dbg [[DBG56]], !llvm.access.group !55 -// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]], !dbg [[DBG56]] -// CHECK5-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG56]], !llvm.access.group !55 -// CHECK5-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG56]], !llvm.access.group !55 -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP4]], align 8, !dbg [[DBG56:![0-9]+]], !llvm.access.group !55 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG56]], !llvm.access.group !55 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[TMP17]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG56]], !llvm.access.group !55 +// CHECK5-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP6]], align 8, !dbg [[DBG56]], !llvm.access.group !55 +// CHECK5-NEXT: [[TMP20:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG56]], !llvm.access.group !55 +// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[TMP20]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !dbg [[DBG56]], !llvm.access.group !55 +// CHECK5-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP8]], align 8, !dbg [[DBG56]], !llvm.access.group !55 +// CHECK5-NEXT: [[TMP23:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG56]], !llvm.access.group !55 +// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[TMP23]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !dbg [[DBG56]], !llvm.access.group !55 +// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG56]], !llvm.access.group !55 +// CHECK5-NEXT: [[TMP26:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG56]], !llvm.access.group !55 +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP25]], i64 [[TMP26]], !dbg [[DBG56]] // CHECK5-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !dbg [[DBG56]], !llvm.access.group !55 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG57:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG53]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group !55 -// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1, !dbg [[DBG54]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group !55 +// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1, !dbg [[DBG54]] // CHECK5-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group !55 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG53]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3048,23 +3213,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK5-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK5-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK5-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB23:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..5 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]), !dbg [[DBG62:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG62:![0-9]+]] +// CHECK5-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8, !dbg [[DBG62]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG62]] +// CHECK5-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8, !dbg [[DBG62]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG62]] +// CHECK5-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8, !dbg [[DBG62]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG62]] +// CHECK5-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8, !dbg [[DBG62]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB23:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG62]] // CHECK5-NEXT: ret void, !dbg [[DBG63:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG64:![0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] !dbg [[DBG64:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -3074,65 +3245,67 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK5-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK5-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK5-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG65:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8, !dbg [[DBG65]] -// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8, !dbg [[DBG65]] -// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8, !dbg [[DBG65]] +// CHECK5-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG65:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8, !dbg [[DBG65]] // CHECK5-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG66:![0-9]+]] // CHECK5-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG66]] // CHECK5-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG66]] // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG66]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG65]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG65]] -// CHECK5-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB23]], i32 [[TMP5]], i32 1073741860, i64 0, i64 16908287, i64 1, i64 7), !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4, !dbg [[DBG65]] +// CHECK5-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB23]], i32 [[TMP10]], i32 1073741860, i64 0, i64 16908287, i64 1, i64 7), !dbg [[DBG65]] // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG65]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB23]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]), !dbg [[DBG65]] -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB23]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]), !dbg [[DBG65]] +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0, !dbg [[DBG65]] // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG65]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG66]] -// CHECK5-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG66]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG66]] +// CHECK5-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG66]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG65]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group !67 -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG66]], !llvm.access.group !67 -// CHECK5-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1, !dbg [[DBG66]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]], !dbg [[DBG66]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group !67 +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG66]], !llvm.access.group !67 +// CHECK5-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1, !dbg [[DBG66]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]], !dbg [[DBG66]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG65]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group !67 -// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127, !dbg [[DBG66]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group !67 +// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127, !dbg [[DBG66]] // CHECK5-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]], !dbg [[DBG66]] // CHECK5-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !dbg [[DBG66]], !llvm.access.group !67 -// CHECK5-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !dbg [[DBG68:![0-9]+]], !llvm.access.group !67 -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG68]], !llvm.access.group !67 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]], !dbg [[DBG68]] -// CHECK5-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG68]], !llvm.access.group !67 -// CHECK5-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG68]], !llvm.access.group !67 -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG68]], !llvm.access.group !67 -// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]], !dbg [[DBG68]] -// CHECK5-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !dbg [[DBG68]], !llvm.access.group !67 -// CHECK5-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]], !dbg [[DBG68]] -// CHECK5-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !dbg [[DBG68]], !llvm.access.group !67 -// CHECK5-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG68]], !llvm.access.group !67 -// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]], !dbg [[DBG68]] -// CHECK5-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !dbg [[DBG68]], !llvm.access.group !67 -// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]], !dbg [[DBG68]] -// CHECK5-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG68]], !llvm.access.group !67 -// CHECK5-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG68]], !llvm.access.group !67 -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP4]], align 8, !dbg [[DBG68:![0-9]+]], !llvm.access.group !67 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG68]], !llvm.access.group !67 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[TMP17]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG68]], !llvm.access.group !67 +// CHECK5-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP6]], align 8, !dbg [[DBG68]], !llvm.access.group !67 +// CHECK5-NEXT: [[TMP20:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG68]], !llvm.access.group !67 +// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[TMP20]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !dbg [[DBG68]], !llvm.access.group !67 +// CHECK5-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP8]], align 8, !dbg [[DBG68]], !llvm.access.group !67 +// CHECK5-NEXT: [[TMP23:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG68]], !llvm.access.group !67 +// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[TMP23]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !dbg [[DBG68]], !llvm.access.group !67 +// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG68]], !llvm.access.group !67 +// CHECK5-NEXT: [[TMP26:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG68]], !llvm.access.group !67 +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP25]], i64 [[TMP26]], !dbg [[DBG68]] // CHECK5-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !dbg [[DBG68]], !llvm.access.group !67 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG69:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG65]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group !67 -// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1, !dbg [[DBG66]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group !67 +// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1, !dbg [[DBG66]] // CHECK5-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group !67 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG65]], !llvm.loop [[LOOP70:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3152,26 +3325,33 @@ // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[Y:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK5-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK5-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK5-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK5-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[X]], align 4, !dbg [[DBG74:![0-9]+]] // CHECK5-NEXT: store i32 0, i32* [[Y]], align 4, !dbg [[DBG75:![0-9]+]] -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB25:[0-9]+]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[Y]], float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]), !dbg [[DBG76:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG76:![0-9]+]] +// CHECK5-NEXT: store i32* [[Y]], i32** [[TMP0]], align 8, !dbg [[DBG76]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG76]] +// CHECK5-NEXT: store float** [[A_ADDR]], float*** [[TMP1]], align 8, !dbg [[DBG76]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG76]] +// CHECK5-NEXT: store float** [[B_ADDR]], float*** [[TMP2]], align 8, !dbg [[DBG76]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG76]] +// CHECK5-NEXT: store float** [[C_ADDR]], float*** [[TMP3]], align 8, !dbg [[DBG76]] +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4, !dbg [[DBG76]] +// CHECK5-NEXT: store float** [[D_ADDR]], float*** [[TMP4]], align 8, !dbg [[DBG76]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB25:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG76]] // CHECK5-NEXT: ret void, !dbg [[DBG77:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[Y:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG78:![0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] !dbg [[DBG78:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3187,21 +3367,23 @@ // CHECK5-NEXT: [[X8:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i32* [[Y]], i32** [[Y_ADDR]], align 8 -// CHECK5-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK5-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK5-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK5-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[Y_ADDR]], align 8, !dbg [[DBG79:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[B_ADDR]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[C_ADDR]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP4:%.*]] = load float**, float*** [[D_ADDR]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !dbg [[DBG80:![0-9]+]] -// CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8, !dbg [[DBG80]] +// CHECK5-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG79:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP10:%.*]] = load float**, float*** [[TMP9]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4, !dbg [[DBG80:![0-9]+]] +// CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[TMP11]] to i8, !dbg [[DBG80]] // CHECK5-NEXT: store i8 [[CONV]], i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] -// CHECK5-NEXT: [[CONV3:%.*]] = sext i8 [[TMP6]] to i32, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] +// CHECK5-NEXT: [[CONV3:%.*]] = sext i8 [[TMP12]] to i32, !dbg [[DBG80]] // CHECK5-NEXT: [[SUB:%.*]] = sub i32 57, [[CONV3]], !dbg [[DBG80]] // CHECK5-NEXT: [[ADD:%.*]] = add i32 [[SUB]], 1, !dbg [[DBG80]] // CHECK5-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1, !dbg [[DBG80]] @@ -3209,85 +3391,85 @@ // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], 11, !dbg [[DBG81:![0-9]+]] // CHECK5-NEXT: [[SUB5:%.*]] = sub nsw i64 [[MUL]], 1, !dbg [[DBG81]] // CHECK5-NEXT: store i64 [[SUB5]], i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG80]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] -// CHECK5-NEXT: store i8 [[TMP7]], i8* [[I]], align 1, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] +// CHECK5-NEXT: store i8 [[TMP13]], i8* [[I]], align 1, !dbg [[DBG80]] // CHECK5-NEXT: store i32 11, i32* [[X]], align 4, !dbg [[DBG81]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] -// CHECK5-NEXT: [[CONV6:%.*]] = sext i8 [[TMP8]] to i32, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] +// CHECK5-NEXT: [[CONV6:%.*]] = sext i8 [[TMP14]] to i32, !dbg [[DBG80]] // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[CONV6]], 57, !dbg [[DBG80]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]], !dbg [[DBG79]] // CHECK5: omp.precond.then: // CHECK5-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG80]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG81]] -// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG81]] +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[DOTOMP_UB]], align 8, !dbg [[DBG80]] // CHECK5-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG80]] // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG80]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG81]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !dbg [[DBG79]] -// CHECK5-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB25]], i32 [[TMP12]], i32 1073741862, i64 0, i64 [[TMP10]], i64 1, i64 1), !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG81]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4, !dbg [[DBG79]] +// CHECK5-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB25]], i32 [[TMP18]], i32 1073741862, i64 0, i64 [[TMP16]], i64 1, i64 1), !dbg [[DBG79]] // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG79]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB25]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]), !dbg [[DBG79]] -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB25]], i32 [[TMP20]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]), !dbg [[DBG79]] +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP21]], 0, !dbg [[DBG79]] // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG79]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG80]] -// CHECK5-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG80]] +// CHECK5-NEXT: store i64 [[TMP22]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG80]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG79]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group !82 -// CHECK5-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG80]], !llvm.access.group !82 -// CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]], !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group !82 +// CHECK5-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG80]], !llvm.access.group !82 +// CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP23]], [[TMP24]], !dbg [[DBG80]] // CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG79]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]], !llvm.access.group !82 -// CHECK5-NEXT: [[CONV10:%.*]] = sext i8 [[TMP19]] to i64, !dbg [[DBG80]] -// CHECK5-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group !82 -// CHECK5-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP20]], 11, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]], !llvm.access.group !82 +// CHECK5-NEXT: [[CONV10:%.*]] = sext i8 [[TMP25]] to i64, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group !82 +// CHECK5-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP26]], 11, !dbg [[DBG80]] // CHECK5-NEXT: [[MUL12:%.*]] = mul nsw i64 [[DIV11]], 1, !dbg [[DBG80]] // CHECK5-NEXT: [[ADD13:%.*]] = add nsw i64 [[CONV10]], [[MUL12]], !dbg [[DBG80]] // CHECK5-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD13]] to i8, !dbg [[DBG80]] // CHECK5-NEXT: store i8 [[CONV14]], i8* [[I7]], align 1, !dbg [[DBG80]], !llvm.access.group !82 -// CHECK5-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group !82 -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group !82 -// CHECK5-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP22]], 11, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group !82 +// CHECK5-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group !82 +// CHECK5-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP28]], 11, !dbg [[DBG80]] // CHECK5-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 11, !dbg [[DBG80]] -// CHECK5-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP21]], [[MUL16]], !dbg [[DBG80]] +// CHECK5-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP27]], [[MUL16]], !dbg [[DBG80]] // CHECK5-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 1, !dbg [[DBG81]] // CHECK5-NEXT: [[SUB19:%.*]] = sub nsw i64 11, [[MUL18]], !dbg [[DBG81]] // CHECK5-NEXT: [[CONV20:%.*]] = trunc i64 [[SUB19]] to i32, !dbg [[DBG81]] // CHECK5-NEXT: store i32 [[CONV20]], i32* [[X8]], align 4, !dbg [[DBG81]], !llvm.access.group !82 -// CHECK5-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG83:![0-9]+]], !llvm.access.group !82 -// CHECK5-NEXT: [[TMP24:%.*]] = load i8, i8* [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group !82 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP24]] to i64, !dbg [[DBG83]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM]], !dbg [[DBG83]] -// CHECK5-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG83]], !llvm.access.group !82 -// CHECK5-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP3]], align 8, !dbg [[DBG83]], !llvm.access.group !82 -// CHECK5-NEXT: [[TMP27:%.*]] = load i8, i8* [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group !82 -// CHECK5-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP27]] to i64, !dbg [[DBG83]] -// CHECK5-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM21]], !dbg [[DBG83]] -// CHECK5-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX22]], align 4, !dbg [[DBG83]], !llvm.access.group !82 -// CHECK5-NEXT: [[MUL23:%.*]] = fmul float [[TMP25]], [[TMP28]], !dbg [[DBG83]] -// CHECK5-NEXT: [[TMP29:%.*]] = load float*, float** [[TMP4]], align 8, !dbg [[DBG83]], !llvm.access.group !82 +// CHECK5-NEXT: [[TMP29:%.*]] = load float*, float** [[TMP6]], align 8, !dbg [[DBG83:![0-9]+]], !llvm.access.group !82 // CHECK5-NEXT: [[TMP30:%.*]] = load i8, i8* [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group !82 -// CHECK5-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP30]] to i64, !dbg [[DBG83]] -// CHECK5-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 [[IDXPROM24]], !dbg [[DBG83]] -// CHECK5-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX25]], align 4, !dbg [[DBG83]], !llvm.access.group !82 -// CHECK5-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP31]], !dbg [[DBG83]] -// CHECK5-NEXT: [[TMP32:%.*]] = load float*, float** [[TMP1]], align 8, !dbg [[DBG83]], !llvm.access.group !82 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP30]] to i64, !dbg [[DBG83]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 [[IDXPROM]], !dbg [[DBG83]] +// CHECK5-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG83]], !llvm.access.group !82 +// CHECK5-NEXT: [[TMP32:%.*]] = load float*, float** [[TMP8]], align 8, !dbg [[DBG83]], !llvm.access.group !82 // CHECK5-NEXT: [[TMP33:%.*]] = load i8, i8* [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group !82 -// CHECK5-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP33]] to i64, !dbg [[DBG83]] -// CHECK5-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, float* [[TMP32]], i64 [[IDXPROM27]], !dbg [[DBG83]] +// CHECK5-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP33]] to i64, !dbg [[DBG83]] +// CHECK5-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, float* [[TMP32]], i64 [[IDXPROM21]], !dbg [[DBG83]] +// CHECK5-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX22]], align 4, !dbg [[DBG83]], !llvm.access.group !82 +// CHECK5-NEXT: [[MUL23:%.*]] = fmul float [[TMP31]], [[TMP34]], !dbg [[DBG83]] +// CHECK5-NEXT: [[TMP35:%.*]] = load float*, float** [[TMP10]], align 8, !dbg [[DBG83]], !llvm.access.group !82 +// CHECK5-NEXT: [[TMP36:%.*]] = load i8, i8* [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group !82 +// CHECK5-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP36]] to i64, !dbg [[DBG83]] +// CHECK5-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, float* [[TMP35]], i64 [[IDXPROM24]], !dbg [[DBG83]] +// CHECK5-NEXT: [[TMP37:%.*]] = load float, float* [[ARRAYIDX25]], align 4, !dbg [[DBG83]], !llvm.access.group !82 +// CHECK5-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP37]], !dbg [[DBG83]] +// CHECK5-NEXT: [[TMP38:%.*]] = load float*, float** [[TMP4]], align 8, !dbg [[DBG83]], !llvm.access.group !82 +// CHECK5-NEXT: [[TMP39:%.*]] = load i8, i8* [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group !82 +// CHECK5-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP39]] to i64, !dbg [[DBG83]] +// CHECK5-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, float* [[TMP38]], i64 [[IDXPROM27]], !dbg [[DBG83]] // CHECK5-NEXT: store float [[MUL26]], float* [[ARRAYIDX28]], align 4, !dbg [[DBG83]], !llvm.access.group !82 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG84:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG79]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group !82 -// CHECK5-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP34]], 1, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group !82 +// CHECK5-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP40]], 1, !dbg [[DBG80]] // CHECK5-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group !82 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG79]], !llvm.loop [[LOOP85:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3308,24 +3490,30 @@ // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK5-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK5-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK5-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[X]], align 4, !dbg [[DBG89:![0-9]+]] -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB27:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]), !dbg [[DBG90:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG90:![0-9]+]] +// CHECK5-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8, !dbg [[DBG90]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG90]] +// CHECK5-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8, !dbg [[DBG90]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG90]] +// CHECK5-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8, !dbg [[DBG90]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG90]] +// CHECK5-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8, !dbg [[DBG90]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB27:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG90]] // CHECK5-NEXT: ret void, !dbg [[DBG91:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG92:![0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] !dbg [[DBG92:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3337,78 +3525,80 @@ // CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK5-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK5-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK5-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG93:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8, !dbg [[DBG93]] -// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8, !dbg [[DBG93]] -// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8, !dbg [[DBG93]] +// CHECK5-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG93:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8, !dbg [[DBG93]] // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG94:![0-9]+]] // CHECK5-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG94]] // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG94]] // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG94]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG93]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG93]] -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB27]], i32 [[TMP5]], i32 1073741861, i32 0, i32 199, i32 1, i32 1), !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4, !dbg [[DBG93]] +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB27]], i32 [[TMP10]], i32 1073741861, i32 0, i32 199, i32 1, i32 1), !dbg [[DBG93]] // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG93]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB27]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]), !dbg [[DBG93]] -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB27]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]), !dbg [[DBG93]] +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0, !dbg [[DBG93]] // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG93]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG94]] -// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG94]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG94]] +// CHECK5-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG94]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG93]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group !95 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG94]], !llvm.access.group !95 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]], !dbg [[DBG94]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group !95 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG94]], !llvm.access.group !95 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]], !dbg [[DBG94]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG93]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group !95 -// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 20, !dbg [[DBG94]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group !95 +// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 20, !dbg [[DBG94]] // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1, !dbg [[DBG94]] // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]], !dbg [[DBG94]] // CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8, !dbg [[DBG94]] // CHECK5-NEXT: store i8 [[CONV]], i8* [[I]], align 1, !dbg [[DBG94]], !llvm.access.group !95 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group !95 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group !95 -// CHECK5-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP12]], 20, !dbg [[DBG94]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group !95 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group !95 +// CHECK5-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP17]], 20, !dbg [[DBG94]] // CHECK5-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 20, !dbg [[DBG94]] -// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL3]], !dbg [[DBG94]] +// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], [[MUL3]], !dbg [[DBG94]] // CHECK5-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1, !dbg [[DBG96:![0-9]+]] // CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 -10, [[MUL4]], !dbg [[DBG96]] // CHECK5-NEXT: store i32 [[ADD5]], i32* [[X]], align 4, !dbg [[DBG96]], !llvm.access.group !95 -// CHECK5-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !dbg [[DBG97:![0-9]+]], !llvm.access.group !95 -// CHECK5-NEXT: [[TMP14:%.*]] = load i8, i8* [[I]], align 1, !dbg [[DBG97]], !llvm.access.group !95 -// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64, !dbg [[DBG97]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]], !dbg [[DBG97]] -// CHECK5-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG97]], !llvm.access.group !95 -// CHECK5-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG97]], !llvm.access.group !95 -// CHECK5-NEXT: [[TMP17:%.*]] = load i8, i8* [[I]], align 1, !dbg [[DBG97]], !llvm.access.group !95 -// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64, !dbg [[DBG97]] -// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM6]], !dbg [[DBG97]] -// CHECK5-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !dbg [[DBG97]], !llvm.access.group !95 -// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]], !dbg [[DBG97]] -// CHECK5-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !dbg [[DBG97]], !llvm.access.group !95 -// CHECK5-NEXT: [[TMP20:%.*]] = load i8, i8* [[I]], align 1, !dbg [[DBG97]], !llvm.access.group !95 -// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64, !dbg [[DBG97]] -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM9]], !dbg [[DBG97]] -// CHECK5-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !dbg [[DBG97]], !llvm.access.group !95 -// CHECK5-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]], !dbg [[DBG97]] -// CHECK5-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG97]], !llvm.access.group !95 -// CHECK5-NEXT: [[TMP23:%.*]] = load i8, i8* [[I]], align 1, !dbg [[DBG97]], !llvm.access.group !95 -// CHECK5-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64, !dbg [[DBG97]] -// CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM12]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP4]], align 8, !dbg [[DBG97:![0-9]+]], !llvm.access.group !95 +// CHECK5-NEXT: [[TMP19:%.*]] = load i8, i8* [[I]], align 1, !dbg [[DBG97]], !llvm.access.group !95 +// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP19]] to i64, !dbg [[DBG97]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG97]], !llvm.access.group !95 +// CHECK5-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP6]], align 8, !dbg [[DBG97]], !llvm.access.group !95 +// CHECK5-NEXT: [[TMP22:%.*]] = load i8, i8* [[I]], align 1, !dbg [[DBG97]], !llvm.access.group !95 +// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP22]] to i64, !dbg [[DBG97]] +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM6]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP23:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !dbg [[DBG97]], !llvm.access.group !95 +// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[TMP20]], [[TMP23]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP24:%.*]] = load float*, float** [[TMP8]], align 8, !dbg [[DBG97]], !llvm.access.group !95 +// CHECK5-NEXT: [[TMP25:%.*]] = load i8, i8* [[I]], align 1, !dbg [[DBG97]], !llvm.access.group !95 +// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP25]] to i64, !dbg [[DBG97]] +// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP24]], i64 [[IDXPROM9]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP26:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !dbg [[DBG97]], !llvm.access.group !95 +// CHECK5-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP26]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP27:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG97]], !llvm.access.group !95 +// CHECK5-NEXT: [[TMP28:%.*]] = load i8, i8* [[I]], align 1, !dbg [[DBG97]], !llvm.access.group !95 +// CHECK5-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP28]] to i64, !dbg [[DBG97]] +// CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[TMP27]], i64 [[IDXPROM12]], !dbg [[DBG97]] // CHECK5-NEXT: store float [[MUL11]], float* [[ARRAYIDX13]], align 4, !dbg [[DBG97]], !llvm.access.group !95 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG98:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG93]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group !95 -// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], 1, !dbg [[DBG94]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group !95 +// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP29]], 1, !dbg [[DBG94]] // CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group !95 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG93]], !llvm.loop [[LOOP99:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3433,7 +3623,7 @@ // CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK5-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK5-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK5-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4, !dbg [[DBG105:![0-9]+]] @@ -3442,24 +3632,26 @@ // CHECK5-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG105]] // CHECK5-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 16, !dbg [[DBG105]] // CHECK5-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG105]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4, !dbg [[DBG106:![0-9]+]] -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[N_CASTED]] to i32*, !dbg [[DBG106]] -// CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4, !dbg [[DBG106]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8, !dbg [[DBG106]] -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB32:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, i64, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), float** [[A_ADDR]], i64 [[TMP1]], i64 [[TMP4]]), !dbg [[DBG106]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG107:![0-9]+]] -// CHECK5-NEXT: call void @llvm.stackrestore(i8* [[TMP5]]), !dbg [[DBG107]] -// CHECK5-NEXT: ret void, !dbg [[DBG107]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG106:![0-9]+]] +// CHECK5-NEXT: store float** [[A_ADDR]], float*** [[TMP3]], align 8, !dbg [[DBG106]] +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG106]] +// CHECK5-NEXT: store i64 [[TMP1]], i64* [[TMP4]], align 8, !dbg [[DBG106]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG106]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4, !dbg [[DBG107:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8, !dbg [[DBG106]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB32:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG106]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG108:![0-9]+]] +// CHECK5-NEXT: call void @llvm.stackrestore(i8* [[TMP7]]), !dbg [[DBG108]] +// CHECK5-NEXT: ret void, !dbg [[DBG108]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[N:%.*]]) #[[ATTR1]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG108:![0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG109:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK5-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3471,108 +3663,111 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG109:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG109]] -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*, !dbg [[DBG109]] -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG110:![0-9]+]] -// CHECK5-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG109]] -// CHECK5-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG109]] -// CHECK5-NEXT: [[VLA1:%.*]] = alloca float, i64 [[TMP1]], align 16, !dbg [[DBG109]] -// CHECK5-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG109]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG109]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB29:[0-9]+]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5), !dbg [[DBG109]] -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG110:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8, !dbg [[DBG110]] +// CHECK5-NEXT: store i32 [[TMP6]], i32* [[N]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG111:![0-9]+]] +// CHECK5-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP7:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG110]] +// CHECK5-NEXT: store i8* [[TMP7]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG110]] +// CHECK5-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP4]], align 16, !dbg [[DBG110]] +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB29:[0-9]+]], i32 [[TMP9]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5), !dbg [[DBG110]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG110]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], 16908288, !dbg [[DBG110]] -// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP10]], 16908288, !dbg [[DBG111]] +// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG111]] // CHECK5: cond.true: -// CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG110]] +// CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG111]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG111]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ], !dbg [[DBG110]] -// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP8]], [[TMP9]], !dbg [[DBG110]] -// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ], !dbg [[DBG111]] +// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP13]], [[TMP14]], !dbg [[DBG111]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]], !dbg [[DBG110]] // CHECK5: omp.dispatch.cleanup: -// CHECK5-NEXT: br label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG110]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG110]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]], !dbg [[DBG110]] -// CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP15]], [[TMP16]], !dbg [[DBG111]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]], !dbg [[DBG110]] // CHECK5: omp.inner.for.cond.cleanup: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG110]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP12]], 127, !dbg [[DBG110]] -// CHECK5-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]], !dbg [[DBG110]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP17]], 127, !dbg [[DBG111]] +// CHECK5-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]], !dbg [[DBG111]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !dbg [[DBG111]] // CHECK5-NEXT: [[CALL:%.*]] = invoke noundef i32 @_Z3foov() -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG111:![0-9]+]] +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG112:![0-9]+]] // CHECK5: invoke.cont: -// CHECK5-NEXT: [[CONV4:%.*]] = sitofp i32 [[CALL]] to float, !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG111]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64, !dbg [[DBG111]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA1]], i64 [[IDXPROM]], !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG111]] -// CHECK5-NEXT: [[ADD5:%.*]] = fadd float [[CONV4]], [[TMP14]], !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG111]] -// CHECK5-NEXT: [[CONV6:%.*]] = sitofp i32 [[TMP15]] to float, !dbg [[DBG111]] -// CHECK5-NEXT: [[ADD7:%.*]] = fadd float [[ADD5]], [[CONV6]], !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG111]] -// CHECK5-NEXT: [[IDXPROM8:%.*]] = zext i32 [[TMP17]] to i64, !dbg [[DBG111]] -// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM8]], !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4, !dbg [[DBG111]] -// CHECK5-NEXT: [[ADD10:%.*]] = fadd float [[TMP18]], [[ADD7]], !dbg [[DBG111]] -// CHECK5-NEXT: store float [[ADD10]], float* [[ARRAYIDX9]], align 4, !dbg [[DBG111]] -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG111]] +// CHECK5-NEXT: [[CONV:%.*]] = sitofp i32 [[CALL]] to float, !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG112]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP18]] to i64, !dbg [[DBG112]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA]], i64 [[IDXPROM]], !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG112]] +// CHECK5-NEXT: [[ADD3:%.*]] = fadd float [[CONV]], [[TMP19]], !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[N]], align 4, !dbg [[DBG112]] +// CHECK5-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP20]] to float, !dbg [[DBG112]] +// CHECK5-NEXT: [[ADD5:%.*]] = fadd float [[ADD3]], [[CONV4]], !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG112]] +// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP22]] to i64, !dbg [[DBG112]] +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM6]], !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP23:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !dbg [[DBG112]] +// CHECK5-NEXT: [[ADD8:%.*]] = fadd float [[TMP23]], [[ADD5]], !dbg [[DBG112]] +// CHECK5-NEXT: store float [[ADD8]], float* [[ARRAYIDX7]], align 4, !dbg [[DBG112]] +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG112]] // CHECK5: omp.body.continue: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG110]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP19]], 1, !dbg [[DBG110]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG109]], !llvm.loop [[LOOP112:![0-9]+]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[ADD9:%.*]] = add i32 [[TMP24]], 1, !dbg [[DBG111]] +// CHECK5-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG110]], !llvm.loop [[LOOP113:![0-9]+]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG110]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP20]], [[TMP21]], !dbg [[DBG110]] -// CHECK5-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[ADD13:%.*]] = add i32 [[TMP22]], [[TMP23]], !dbg [[DBG110]] -// CHECK5-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG109]], !llvm.loop [[LOOP113:![0-9]+]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[ADD10:%.*]] = add i32 [[TMP25]], [[TMP26]], !dbg [[DBG111]] +// CHECK5-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_LB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP27]], [[TMP28]], !dbg [[DBG111]] +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG110]], !llvm.loop [[LOOP114:![0-9]+]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB31:[0-9]+]], i32 [[TMP4]]), !dbg [[DBG109]] -// CHECK5-NEXT: [[TMP24:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG109]] -// CHECK5-NEXT: call void @llvm.stackrestore(i8* [[TMP24]]), !dbg [[DBG109]] -// CHECK5-NEXT: ret void, !dbg [[DBG111]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB31:[0-9]+]], i32 [[TMP9]]), !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG110]] +// CHECK5-NEXT: call void @llvm.stackrestore(i8* [[TMP29]]), !dbg [[DBG110]] +// CHECK5-NEXT: ret void, !dbg [[DBG112]] // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP25:%.*]] = landingpad { i8*, i32 } -// CHECK5-NEXT: catch i8* null, !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP26:%.*]] = extractvalue { i8*, i32 } [[TMP25]], 0, !dbg [[DBG111]] -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP26]]) #[[ATTR7:[0-9]+]], !dbg [[DBG111]] -// CHECK5-NEXT: unreachable, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP30:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: catch i8* null, !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP31:%.*]] = extractvalue { i8*, i32 } [[TMP30]], 0, !dbg [[DBG112]] +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP31]]) #[[ATTR7:[0-9]+]], !dbg [[DBG112]] +// CHECK5-NEXT: unreachable, !dbg [[DBG112]] // // // CHECK5-LABEL: define {{[^@]+}}@__clang_call_terminate @@ -3587,25 +3782,25 @@ // CHECK6-NEXT: entry: // CHECK6-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK6-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK6-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK6-NEXT: store double 5.000000e+00, double* [[A]], align 8 // CHECK6-NEXT: [[TMP0:%.*]] = load double, double* [[A]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = fptosi double [[TMP0]] to i8 // CHECK6-NEXT: store i8 [[CONV]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK6-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK6-NEXT: store i8 [[TMP1]], i8* [[CONV1]], align 1 -// CHECK6-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK6-NEXT: store i8 [[TMP2]], i8* [[TMP1]], align 1 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK6-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK6-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca double, align 8 @@ -3616,94 +3811,97 @@ // CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[A:%.*]] = alloca double, align 8 -// CHECK6-NEXT: [[I5:%.*]] = alloca i64, align 8 +// CHECK6-NEXT: [[I4:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK6-NEXT: [[TMP0:%.*]] = load double, double* undef, align 8 -// CHECK6-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP0]] +// CHECK6-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK6-NEXT: store i8 [[TMP2]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK6-NEXT: [[TMP3:%.*]] = load double, double* undef, align 8 +// CHECK6-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]] // CHECK6-NEXT: store double [[ADD]], double* [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load double, double* [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK6-NEXT: [[SUB:%.*]] = fsub double [[TMP1]], 1.000000e+00 +// CHECK6-NEXT: [[TMP4:%.*]] = load double, double* [[DOTCAPTURE_EXPR_1]], align 8 +// CHECK6-NEXT: [[SUB:%.*]] = fsub double [[TMP4]], 1.000000e+00 // CHECK6-NEXT: [[DIV:%.*]] = fdiv double [[SUB]], 1.000000e+00 -// CHECK6-NEXT: [[CONV3:%.*]] = fptoui double [[DIV]] to i64 -// CHECK6-NEXT: [[SUB4:%.*]] = sub i64 [[CONV3]], 1 -// CHECK6-NEXT: store i64 [[SUB4]], i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK6-NEXT: [[CONV:%.*]] = fptoui double [[DIV]] to i64 +// CHECK6-NEXT: [[SUB3:%.*]] = sub i64 [[CONV]], 1 +// CHECK6-NEXT: store i64 [[SUB3]], i64* [[DOTCAPTURE_EXPR_2]], align 8 // CHECK6-NEXT: store i64 1, i64* [[I]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load double, double* [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK6-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP2]] +// CHECK6-NEXT: [[TMP5:%.*]] = load double, double* [[DOTCAPTURE_EXPR_1]], align 8 +// CHECK6-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP5]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK6: omp.precond.then: // CHECK6-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK6-NEXT: store i64 [[TMP3]], i64* [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK6-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_UB]], align 8 // CHECK6-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK6-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i64 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV6]]) +// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK6-NEXT: [[CONV5:%.*]] = sext i8 [[TMP7]] to i64 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV5]]) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK6-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK6-NEXT: [[CMP7:%.*]] = icmp ugt i64 [[TMP7]], [[TMP8]] -// CHECK6-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK6-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK6-NEXT: [[CMP6:%.*]] = icmp ugt i64 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: -// CHECK6-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i64 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK6-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK6-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK6-NEXT: [[ADD8:%.*]] = add i64 [[TMP13]], 1 -// CHECK6-NEXT: [[CMP9:%.*]] = icmp ult i64 [[TMP12]], [[ADD8]] -// CHECK6-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK6-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK6-NEXT: store i64 [[TMP14]], i64* [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1 +// CHECK6-NEXT: [[CMP8:%.*]] = icmp ult i64 [[TMP15]], [[ADD7]] +// CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK6-NEXT: [[ADD10:%.*]] = add i64 [[TMP15]], 1 -// CHECK6-NEXT: [[CMP11:%.*]] = icmp ult i64 [[TMP14]], [[ADD10]] -// CHECK6-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK6-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[ADD9:%.*]] = add i64 [[TMP18]], 1 +// CHECK6-NEXT: [[CMP10:%.*]] = icmp ult i64 [[TMP17]], [[ADD9]] +// CHECK6-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 1 -// CHECK6-NEXT: [[ADD12:%.*]] = add i64 1, [[MUL]] -// CHECK6-NEXT: store i64 [[ADD12]], i64* [[I5]], align 8 +// CHECK6-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP19]], 1 +// CHECK6-NEXT: [[ADD11:%.*]] = add i64 1, [[MUL]] +// CHECK6-NEXT: store i64 [[ADD11]], i64* [[I4]], align 8 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: [[ADD13:%.*]] = add i64 [[TMP17]], 1 -// CHECK6-NEXT: store i64 [[ADD13]], i64* [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[ADD12:%.*]] = add i64 [[TMP20]], 1 +// CHECK6-NEXT: store i64 [[ADD12]], i64* [[DOTOMP_IV]], align 8 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: -// CHECK6-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK6-NEXT: [[ADD14:%.*]] = add i64 [[TMP18]], [[TMP19]] -// CHECK6-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK6-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK6-NEXT: [[ADD15:%.*]] = add i64 [[TMP20]], [[TMP21]] -// CHECK6-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK6-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK6-NEXT: [[ADD13:%.*]] = add i64 [[TMP21]], [[TMP22]] +// CHECK6-NEXT: store i64 [[ADD13]], i64* [[DOTOMP_LB]], align 8 +// CHECK6-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK6-NEXT: [[ADD14:%.*]] = add i64 [[TMP23]], [[TMP24]] +// CHECK6-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_UB]], align 8 // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: -// CHECK6-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK6-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) // CHECK6-NEXT: br label [[OMP_PRECOND_END]] // CHECK6: omp.precond.end: // CHECK6-NEXT: ret void @@ -3716,23 +3914,29 @@ // CHECK6-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK6-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK6-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK6-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK6-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK6-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK6-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3742,79 +3946,81 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK6-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK6-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK6-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK6-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK6-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM2]] -// CHECK6-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX3]], align 4 -// CHECK6-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK6-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]] -// CHECK6-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4 -// CHECK6-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK6-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK6-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK6-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]] +// CHECK6-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 8 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM]] +// CHECK6-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK6-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 8 +// CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK6-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM2]] +// CHECK6-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK6-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK6-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 8 +// CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM5]] +// CHECK6-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK6-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK6-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK6-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM8]] // CHECK6-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK6-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK6-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK6-NEXT: ret void // // @@ -3825,23 +4031,29 @@ // CHECK6-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK6-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK6-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK6-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK6-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK6-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..2 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK6-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3851,79 +4063,81 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK6-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK6-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK6-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK6-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK6-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK6-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK6-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM2]] -// CHECK6-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX3]], align 4 -// CHECK6-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK6-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]] -// CHECK6-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4 -// CHECK6-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK6-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK6-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK6-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]] +// CHECK6-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP4]], align 8 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM]] +// CHECK6-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK6-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP6]], align 8 +// CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK6-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM2]] +// CHECK6-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK6-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK6-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP8]], align 8 +// CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM5]] +// CHECK6-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK6-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK6-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK6-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM8]] // CHECK6-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK6-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK6-NEXT: ret void // // @@ -3934,23 +4148,29 @@ // CHECK6-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK6-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK6-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK6-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK6-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK6-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK6-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3960,96 +4180,98 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK6-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK6-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK6-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK6-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK6-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK6-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK6-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK6-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4 -// CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK6-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK6-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4 -// CHECK6-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK6-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK6-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] +// CHECK6-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP4]], align 8 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM]] +// CHECK6-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK6-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP6]], align 8 +// CHECK6-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM3]] +// CHECK6-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK6-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP8]], align 8 +// CHECK6-NEXT: [[TMP26:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP25]], i64 [[IDXPROM6]] +// CHECK6-NEXT: [[TMP27:%.*]] = load float, float* [[ARRAYIDX7]], align 4 +// CHECK6-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK6-NEXT: [[TMP28:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK6-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP28]], i64 [[IDXPROM9]] // CHECK6-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 +// CHECK6-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK6-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: -// CHECK6-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK6-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK6-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK6-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK6-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK6-NEXT: ret void // // @@ -4060,23 +4282,29 @@ // CHECK6-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK6-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK6-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK6-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK6-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK6-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..4 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK6-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -4086,65 +4314,67 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK6-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK6-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK6-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK6-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK6-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK6-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 // CHECK6-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK6-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 1073741859, i64 0, i64 16908287, i64 1, i64 1) +// CHECK6-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 1073741859, i64 0, i64 16908287, i64 1, i64 1) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) -// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK6-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) +// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK6-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_IV]], align 8 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK6-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !5 -// CHECK6-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 -// CHECK6-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] +// CHECK6-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1 +// CHECK6-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 +// CHECK6-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127 // CHECK6-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK6-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !5 -// CHECK6-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !5 -// CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]] -// CHECK6-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !5 -// CHECK6-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !5 -// CHECK6-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 -// CHECK6-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]] -// CHECK6-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !5 -// CHECK6-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK6-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !5 -// CHECK6-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 -// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]] -// CHECK6-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !5 -// CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK6-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !5 -// CHECK6-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 -// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]] +// CHECK6-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP17:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[TMP17]] +// CHECK6-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP20:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[TMP20]] +// CHECK6-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !5 +// CHECK6-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]] +// CHECK6-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP23:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[TMP23]] +// CHECK6-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !5 +// CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]] +// CHECK6-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP26:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP25]], i64 [[TMP26]] // CHECK6-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !5 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK6-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 +// CHECK6-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1 // CHECK6-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK6: omp.inner.for.end: @@ -4162,23 +4392,29 @@ // CHECK6-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 // CHECK6-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK6-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK6-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK6-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK6-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..5 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK6-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -4188,65 +4424,67 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK6-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK6-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK6-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK6-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK6-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK6-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8 // CHECK6-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK6-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 1073741860, i64 0, i64 16908287, i64 1, i64 7) +// CHECK6-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 1073741860, i64 0, i64 16908287, i64 1, i64 7) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) -// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK6-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) +// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK6-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_IV]], align 8 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 -// CHECK6-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !8 -// CHECK6-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 -// CHECK6-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] +// CHECK6-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1 +// CHECK6-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 -// CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 +// CHECK6-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127 // CHECK6-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK6-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !8 -// CHECK6-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !8 -// CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]] -// CHECK6-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !8 -// CHECK6-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !8 -// CHECK6-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 -// CHECK6-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]] -// CHECK6-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !8 -// CHECK6-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK6-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !8 -// CHECK6-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 -// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]] -// CHECK6-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !8 -// CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK6-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !8 -// CHECK6-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 -// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]] +// CHECK6-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP17:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[TMP17]] +// CHECK6-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP20:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[TMP20]] +// CHECK6-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]] +// CHECK6-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP23:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[TMP23]] +// CHECK6-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]] +// CHECK6-NEXT: [[TMP25:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP26:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP25]], i64 [[TMP26]] // CHECK6-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !8 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 -// CHECK6-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 +// CHECK6-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1 // CHECK6-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK6: omp.inner.for.end: @@ -4266,26 +4504,33 @@ // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 // CHECK6-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[Y:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK6-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK6-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK6-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK6-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 // CHECK6-NEXT: store i32 0, i32* [[X]], align 4 // CHECK6-NEXT: store i32 0, i32* [[Y]], align 4 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[Y]], float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store i32* [[Y]], i32** [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store float** [[A_ADDR]], float*** [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store float** [[B_ADDR]], float*** [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK6-NEXT: store float** [[C_ADDR]], float*** [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK6-NEXT: store float** [[D_ADDR]], float*** [[TMP4]], align 8 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[Y:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK6-NEXT: [[Y_ADDR:%.*]] = alloca i32*, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK6-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4301,21 +4546,23 @@ // CHECK6-NEXT: [[X8:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store i32* [[Y]], i32** [[Y_ADDR]], align 8 -// CHECK6-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK6-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK6-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK6-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[Y_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK6-NEXT: [[TMP4:%.*]] = load float**, float*** [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK6-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 +// CHECK6-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 +// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load float**, float*** [[TMP9]], align 8 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK6-NEXT: [[CONV:%.*]] = trunc i32 [[TMP11]] to i8 // CHECK6-NEXT: store i8 [[CONV]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK6-NEXT: [[TMP6:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK6-NEXT: [[CONV3:%.*]] = sext i8 [[TMP6]] to i32 +// CHECK6-NEXT: [[TMP12:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK6-NEXT: [[CONV3:%.*]] = sext i8 [[TMP12]] to i32 // CHECK6-NEXT: [[SUB:%.*]] = sub i32 57, [[CONV3]] // CHECK6-NEXT: [[ADD:%.*]] = add i32 [[SUB]], 1 // CHECK6-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 @@ -4323,85 +4570,85 @@ // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], 11 // CHECK6-NEXT: [[SUB5:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK6-NEXT: store i64 [[SUB5]], i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK6-NEXT: store i8 [[TMP7]], i8* [[I]], align 1 +// CHECK6-NEXT: [[TMP13:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK6-NEXT: store i8 [[TMP13]], i8* [[I]], align 1 // CHECK6-NEXT: store i32 11, i32* [[X]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK6-NEXT: [[CONV6:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK6-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK6-NEXT: [[CONV6:%.*]] = sext i8 [[TMP14]] to i32 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[CONV6]], 57 // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK6: omp.precond.then: // CHECK6-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK6-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK6-NEXT: store i64 [[TMP15]], i64* [[DOTOMP_UB]], align 8 // CHECK6-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK6-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 1073741862, i64 0, i64 [[TMP10]], i64 1, i64 1) +// CHECK6-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK6-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 1073741862, i64 0, i64 [[TMP16]], i64 1, i64 1) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) -// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK6-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK6-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]) +// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP21]], 0 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: -// CHECK6-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK6-NEXT: store i64 [[TMP22]], i64* [[DOTOMP_IV]], align 8 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 -// CHECK6-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !11 -// CHECK6-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] +// CHECK6-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK6-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !11 +// CHECK6-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP23]], [[TMP24]] // CHECK6-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !11 -// CHECK6-NEXT: [[CONV10:%.*]] = sext i8 [[TMP19]] to i64 -// CHECK6-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 -// CHECK6-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP20]], 11 +// CHECK6-NEXT: [[TMP25:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !11 +// CHECK6-NEXT: [[CONV10:%.*]] = sext i8 [[TMP25]] to i64 +// CHECK6-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK6-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP26]], 11 // CHECK6-NEXT: [[MUL12:%.*]] = mul nsw i64 [[DIV11]], 1 // CHECK6-NEXT: [[ADD13:%.*]] = add nsw i64 [[CONV10]], [[MUL12]] // CHECK6-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD13]] to i8 // CHECK6-NEXT: store i8 [[CONV14]], i8* [[I7]], align 1, !llvm.access.group !11 -// CHECK6-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 -// CHECK6-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 -// CHECK6-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP22]], 11 +// CHECK6-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK6-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK6-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP28]], 11 // CHECK6-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 11 -// CHECK6-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP21]], [[MUL16]] +// CHECK6-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP27]], [[MUL16]] // CHECK6-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 1 // CHECK6-NEXT: [[SUB19:%.*]] = sub nsw i64 11, [[MUL18]] // CHECK6-NEXT: [[CONV20:%.*]] = trunc i64 [[SUB19]] to i32 // CHECK6-NEXT: store i32 [[CONV20]], i32* [[X8]], align 4, !llvm.access.group !11 -// CHECK6-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !11 -// CHECK6-NEXT: [[TMP24:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 -// CHECK6-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP24]] to i64 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 -// CHECK6-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !11 -// CHECK6-NEXT: [[TMP27:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 -// CHECK6-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP27]] to i64 -// CHECK6-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM21]] -// CHECK6-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX22]], align 4, !llvm.access.group !11 -// CHECK6-NEXT: [[MUL23:%.*]] = fmul float [[TMP25]], [[TMP28]] -// CHECK6-NEXT: [[TMP29:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !11 +// CHECK6-NEXT: [[TMP29:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !11 // CHECK6-NEXT: [[TMP30:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 -// CHECK6-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP30]] to i64 -// CHECK6-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 [[IDXPROM24]] -// CHECK6-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX25]], align 4, !llvm.access.group !11 -// CHECK6-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP31]] -// CHECK6-NEXT: [[TMP32:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !11 +// CHECK6-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP30]] to i64 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 [[IDXPROM]] +// CHECK6-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK6-NEXT: [[TMP32:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !11 // CHECK6-NEXT: [[TMP33:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 -// CHECK6-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP33]] to i64 -// CHECK6-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, float* [[TMP32]], i64 [[IDXPROM27]] +// CHECK6-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP33]] to i64 +// CHECK6-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, float* [[TMP32]], i64 [[IDXPROM21]] +// CHECK6-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX22]], align 4, !llvm.access.group !11 +// CHECK6-NEXT: [[MUL23:%.*]] = fmul float [[TMP31]], [[TMP34]] +// CHECK6-NEXT: [[TMP35:%.*]] = load float*, float** [[TMP10]], align 8, !llvm.access.group !11 +// CHECK6-NEXT: [[TMP36:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 +// CHECK6-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP36]] to i64 +// CHECK6-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, float* [[TMP35]], i64 [[IDXPROM24]] +// CHECK6-NEXT: [[TMP37:%.*]] = load float, float* [[ARRAYIDX25]], align 4, !llvm.access.group !11 +// CHECK6-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP37]] +// CHECK6-NEXT: [[TMP38:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !11 +// CHECK6-NEXT: [[TMP39:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 +// CHECK6-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP39]] to i64 +// CHECK6-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, float* [[TMP38]], i64 [[IDXPROM27]] // CHECK6-NEXT: store float [[MUL26]], float* [[ARRAYIDX28]], align 4, !llvm.access.group !11 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 -// CHECK6-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP34]], 1 +// CHECK6-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK6-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP40]], 1 // CHECK6-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK6: omp.inner.for.end: @@ -4422,24 +4669,30 @@ // CHECK6-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 // CHECK6-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK6-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK6-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK6-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK6-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 // CHECK6-NEXT: store i32 0, i32* [[X]], align 4 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]) +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store float** [[A_ADDR]], float*** [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store float** [[B_ADDR]], float*** [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store float** [[C_ADDR]], float*** [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK6-NEXT: store float** [[D_ADDR]], float*** [[TMP3]], align 8 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca float**, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK6-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4451,78 +4704,80 @@ // CHECK6-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK6-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 -// CHECK6-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK6-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8 +// CHECK6-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load float**, float*** [[TMP5]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP8:%.*]] = load float**, float*** [[TMP7]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK6-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 1073741861, i32 0, i32 199, i32 1, i32 1) +// CHECK6-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 1073741861, i32 0, i32 199, i32 1, i32 1) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK6-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK6-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 20 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 20 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]] // CHECK6-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 // CHECK6-NEXT: store i8 [[CONV]], i8* [[I]], align 1, !llvm.access.group !14 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK6-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP12]], 20 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP17]], 20 // CHECK6-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 20 -// CHECK6-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL3]] +// CHECK6-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], [[MUL3]] // CHECK6-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 -10, [[MUL4]] // CHECK6-NEXT: store i32 [[ADD5]], i32* [[X]], align 4, !llvm.access.group !14 -// CHECK6-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !14 -// CHECK6-NEXT: [[TMP14:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 -// CHECK6-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !14 -// CHECK6-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !14 -// CHECK6-NEXT: [[TMP17:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 -// CHECK6-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64 -// CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM6]] -// CHECK6-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !14 -// CHECK6-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK6-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !14 -// CHECK6-NEXT: [[TMP20:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 -// CHECK6-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64 -// CHECK6-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM9]] -// CHECK6-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 -// CHECK6-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]] -// CHECK6-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !14 -// CHECK6-NEXT: [[TMP23:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 -// CHECK6-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64 -// CHECK6-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM12]] +// CHECK6-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP19:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 +// CHECK6-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP19]] to i64 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM]] +// CHECK6-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP6]], align 8, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP22:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 +// CHECK6-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP22]] to i64 +// CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM6]] +// CHECK6-NEXT: [[TMP23:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: [[MUL8:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK6-NEXT: [[TMP24:%.*]] = load float*, float** [[TMP8]], align 8, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP25:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 +// CHECK6-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP25]] to i64 +// CHECK6-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP24]], i64 [[IDXPROM9]] +// CHECK6-NEXT: [[TMP26:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP26]] +// CHECK6-NEXT: [[TMP27:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP28:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 +// CHECK6-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP28]] to i64 +// CHECK6-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[TMP27]], i64 [[IDXPROM12]] // CHECK6-NEXT: store float [[MUL11]], float* [[ARRAYIDX13]], align 4, !llvm.access.group !14 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK6-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK6: omp.inner.for.end: @@ -4547,7 +4802,7 @@ // CHECK6-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK6-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK6-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK6-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK6-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 @@ -4556,24 +4811,26 @@ // CHECK6-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 // CHECK6-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 16 // CHECK6-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK6-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, i64, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), float** [[A_ADDR]], i64 [[TMP1]], i64 [[TMP4]]) -// CHECK6-NEXT: [[TMP5:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK6-NEXT: call void @llvm.stackrestore(i8* [[TMP5]]) +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store float** [[A_ADDR]], float*** [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store i64 [[TMP1]], i64* [[TMP4]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK6-NEXT: [[TMP7:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK6-NEXT: call void @llvm.stackrestore(i8* [[TMP7]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[N:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK6-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK6-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK6-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4585,99 +4842,102 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK6-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK6-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK6-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK6-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() -// CHECK6-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 -// CHECK6-NEXT: [[VLA1:%.*]] = alloca float, i64 [[TMP1]], align 16 -// CHECK6-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK6-NEXT: [[TMP7:%.*]] = call i8* @llvm.stacksave() +// CHECK6-NEXT: store i8* [[TMP7]], i8** [[SAVED_STACK]], align 8 +// CHECK6-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP4]], align 16 +// CHECK6-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR0]], align 8 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], 16908288 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP10]], 16908288 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP8]], [[TMP9]] -// CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP13]], [[TMP14]] +// CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] // CHECK6: omp.dispatch.cleanup: // CHECK6-NEXT: br label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] -// CHECK6-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP15]], [[TMP16]] +// CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK6: omp.inner.for.cond.cleanup: // CHECK6-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[MUL:%.*]] = mul i32 [[TMP12]], 127 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[MUL:%.*]] = mul i32 [[TMP17]], 127 // CHECK6-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK6-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3foov() -// CHECK6-NEXT: [[CONV4:%.*]] = sitofp i32 [[CALL]] to float -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA1]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK6-NEXT: [[ADD5:%.*]] = fadd float [[CONV4]], [[TMP14]] -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK6-NEXT: [[CONV6:%.*]] = sitofp i32 [[TMP15]] to float -// CHECK6-NEXT: [[ADD7:%.*]] = fadd float [[ADD5]], [[CONV6]] -// CHECK6-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM8:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK6-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM8]] -// CHECK6-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4 -// CHECK6-NEXT: [[ADD10:%.*]] = fadd float [[TMP18]], [[ADD7]] -// CHECK6-NEXT: store float [[ADD10]], float* [[ARRAYIDX9]], align 4 +// CHECK6-NEXT: [[CONV:%.*]] = sitofp i32 [[CALL]] to float +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA]], i64 [[IDXPROM]] +// CHECK6-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK6-NEXT: [[ADD3:%.*]] = fadd float [[CONV]], [[TMP19]] +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[N]], align 4 +// CHECK6-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP20]] to float +// CHECK6-NEXT: [[ADD5:%.*]] = fadd float [[ADD3]], [[CONV4]] +// CHECK6-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM6]] +// CHECK6-NEXT: [[TMP23:%.*]] = load float, float* [[ARRAYIDX7]], align 4 +// CHECK6-NEXT: [[ADD8:%.*]] = fadd float [[TMP23]], [[ADD5]] +// CHECK6-NEXT: store float [[ADD8]], float* [[ARRAYIDX7]], align 4 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[ADD11:%.*]] = add i32 [[TMP19]], 1 -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[ADD9:%.*]] = add i32 [[TMP24]], 1 +// CHECK6-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD12:%.*]] = add i32 [[TMP20]], [[TMP21]] -// CHECK6-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD13:%.*]] = add i32 [[TMP22]], [[TMP23]] -// CHECK6-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD10:%.*]] = add i32 [[TMP25]], [[TMP26]] +// CHECK6-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD11:%.*]] = add i32 [[TMP27]], [[TMP28]] +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK6-NEXT: [[TMP24:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK6-NEXT: call void @llvm.stackrestore(i8* [[TMP24]]) +// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]]) +// CHECK6-NEXT: [[TMP29:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK6-NEXT: call void @llvm.stackrestore(i8* [[TMP29]]) // CHECK6-NEXT: ret void // // @@ -4796,18 +5056,21 @@ // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[ARR:%.*]] = alloca [10 x i32], align 16 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK11-NEXT: [[TMP0:%.*]] = bitcast [10 x i32]* [[ARR]] to i8* // CHECK11-NEXT: call void @llvm.memset.p0i8.i64(i8* align 16 [[TMP0]], i8 0, i64 40, i1 false) -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), [10 x i32]* [[ARR]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[ARR]], [10 x i32]** [[TMP1]], align 8 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[ARR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[ARR_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[__RANGE1:%.*]] = alloca [10 x i32]*, align 8 @@ -4824,22 +5087,24 @@ // CHECK11-NEXT: [[A:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store [10 x i32]* [[ARR]], [10 x i32]** [[ARR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[ARR_ADDR]], align 8 -// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[__RANGE1]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[__RANGE1]], align 8 -// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP1]], i64 0, i64 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK11-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[__RANGE1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[__RANGE1]], align 8 +// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP3]], i64 0, i64 0 // CHECK11-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAYDECAY]], i64 10 // CHECK11-NEXT: store i32* [[ADD_PTR]], i32** [[__END1]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[__RANGE1]], align 8 -// CHECK11-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[__RANGE1]], align 8 +// CHECK11-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i64 0, i64 0 // CHECK11-NEXT: store i32* [[ARRAYDECAY1]], i32** [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[__END1]], align 8 -// CHECK11-NEXT: store i32* [[TMP3]], i32** [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint i32* [[TMP4]] to i64 -// CHECK11-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint i32* [[TMP5]] to i64 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32*, i32** [[__END1]], align 8 +// CHECK11-NEXT: store i32* [[TMP5]], i32** [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_]], align 8 +// CHECK11-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint i32* [[TMP6]] to i64 +// CHECK11-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint i32* [[TMP7]] to i64 // CHECK11-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] // CHECK11-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 4 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 @@ -4847,65 +5112,65 @@ // CHECK11-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 // CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i64 [[DIV]], 1 // CHECK11-NEXT: store i64 [[SUB4]], i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: store i32* [[TMP6]], i32** [[__BEGIN1]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK11-NEXT: [[CMP:%.*]] = icmp ult i32* [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_]], align 8 +// CHECK11-NEXT: store i32* [[TMP8]], i32** [[__BEGIN1]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK11-NEXT: [[CMP:%.*]] = icmp ult i32* [[TMP9]], [[TMP10]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP20]], 1 -// CHECK11-NEXT: [[ADD_PTR8:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i64 [[MUL]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP22]], 1 +// CHECK11-NEXT: [[ADD_PTR8:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i64 [[MUL]] // CHECK11-NEXT: store i32* [[ADD_PTR8]], i32** [[__BEGIN15]], align 8 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[__BEGIN15]], align 8 -// CHECK11-NEXT: store i32* [[TMP21]], i32** [[A]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[A]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[__BEGIN15]], align 8 +// CHECK11-NEXT: store i32* [[TMP23]], i32** [[A]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32*, i32** [[A]], align 8 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i64 [[TMP23]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i64 [[TMP25]], 1 // CHECK11-NEXT: store i64 [[ADD9]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -4915,18 +5180,21 @@ // CHECK11-SAME: () #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[ARR:%.*]] = alloca [10 x i32], align 16 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK11-NEXT: [[TMP0:%.*]] = bitcast [10 x i32]* [[ARR]] to i8* // CHECK11-NEXT: call void @llvm.memset.p0i8.i64(i8* align 16 [[TMP0]], i8 0, i64 40, i1 false) -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x i32]* [[ARR]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[ARR]], [10 x i32]** [[TMP1]], align 8 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[ARR:%.*]]) #[[ATTR5]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[ARR_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 @@ -4951,41 +5219,43 @@ // CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store [10 x i32]* [[ARR]], [10 x i32]** [[ARR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[ARR_ADDR]], align 8 -// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[__RANGE1]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[__RANGE1]], align 8 -// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP1]], i64 0, i64 0 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK11-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[__RANGE1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[__RANGE1]], align 8 +// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP3]], i64 0, i64 0 // CHECK11-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAYDECAY]], i64 10 // CHECK11-NEXT: store i32* [[ADD_PTR]], i32** [[__END1]], align 8 -// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[__RANGE2]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[__RANGE2]], align 8 -// CHECK11-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[__RANGE2]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[__RANGE2]], align 8 +// CHECK11-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i64 0, i64 0 // CHECK11-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i32, i32* [[ARRAYDECAY2]], i64 10 // CHECK11-NEXT: store i32* [[ADD_PTR3]], i32** [[__END2]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[__RANGE1]], align 8 -// CHECK11-NEXT: [[ARRAYDECAY4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP3]], i64 0, i64 0 +// CHECK11-NEXT: [[TMP5:%.*]] = load [10 x i32]*, [10 x i32]** [[__RANGE1]], align 8 +// CHECK11-NEXT: [[ARRAYDECAY4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP5]], i64 0, i64 0 // CHECK11-NEXT: store i32* [[ARRAYDECAY4]], i32** [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[__END1]], align 8 -// CHECK11-NEXT: store i32* [[TMP4]], i32** [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[TMP5:%.*]] = load [10 x i32]*, [10 x i32]** [[__RANGE2]], align 8 -// CHECK11-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP5]], i64 0, i64 0 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[__END1]], align 8 +// CHECK11-NEXT: store i32* [[TMP6]], i32** [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP7:%.*]] = load [10 x i32]*, [10 x i32]** [[__RANGE2]], align 8 +// CHECK11-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP7]], i64 0, i64 0 // CHECK11-NEXT: store i32* [[ARRAYDECAY7]], i32** [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[__END2]], align 8 -// CHECK11-NEXT: store i32* [[TMP6]], i32** [[DOTCAPTURE_EXPR_8]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint i32* [[TMP7]] to i64 -// CHECK11-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint i32* [[TMP8]] to i64 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[__END2]], align 8 +// CHECK11-NEXT: store i32* [[TMP8]], i32** [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_]], align 8 +// CHECK11-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint i32* [[TMP9]] to i64 +// CHECK11-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint i32* [[TMP10]] to i64 // CHECK11-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] // CHECK11-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 4 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_8]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: [[SUB_PTR_LHS_CAST10:%.*]] = ptrtoint i32* [[TMP9]] to i64 -// CHECK11-NEXT: [[SUB_PTR_RHS_CAST11:%.*]] = ptrtoint i32* [[TMP10]] to i64 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK11-NEXT: [[SUB_PTR_LHS_CAST10:%.*]] = ptrtoint i32* [[TMP11]] to i64 +// CHECK11-NEXT: [[SUB_PTR_RHS_CAST11:%.*]] = ptrtoint i32* [[TMP12]] to i64 // CHECK11-NEXT: [[SUB_PTR_SUB12:%.*]] = sub i64 [[SUB_PTR_LHS_CAST10]], [[SUB_PTR_RHS_CAST11]] // CHECK11-NEXT: [[SUB_PTR_DIV13:%.*]] = sdiv exact i64 [[SUB_PTR_SUB12]], 4 // CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i64 [[SUB_PTR_DIV13]], 1 @@ -4994,84 +5264,84 @@ // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[DIV]], [[DIV16]] // CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK11-NEXT: store i64 [[SUB17]], i64* [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: store i32* [[TMP11]], i32** [[__BEGIN1]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: store i32* [[TMP12]], i32** [[__BEGIN2]], align 8 // CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP:%.*]] = icmp ult i32* [[TMP13]], [[TMP14]] +// CHECK11-NEXT: store i32* [[TMP13]], i32** [[__BEGIN1]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK11-NEXT: store i32* [[TMP14]], i32** [[__BEGIN2]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_]], align 8 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[CMP:%.*]] = icmp ult i32* [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_8]], align 8 -// CHECK11-NEXT: [[CMP18:%.*]] = icmp ult i32* [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK11-NEXT: [[CMP18:%.*]] = icmp ult i32* [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP18]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK11-NEXT: store i64 [[TMP17]], i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK11-NEXT: store i64 [[TMP19]], i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK11-NEXT: [[CMP21:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK11-NEXT: [[CMP21:%.*]] = icmp sgt i64 [[TMP22]], [[TMP23]] // CHECK11-NEXT: br i1 [[CMP21]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_9]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP24]], [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP24]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP26]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP22:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP22:%.*]] = icmp sle i64 [[TMP27]], [[TMP28]] // CHECK11-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_8]], align 8 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: [[SUB_PTR_LHS_CAST23:%.*]] = ptrtoint i32* [[TMP29]] to i64 -// CHECK11-NEXT: [[SUB_PTR_RHS_CAST24:%.*]] = ptrtoint i32* [[TMP30]] to i64 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK11-NEXT: [[SUB_PTR_LHS_CAST23:%.*]] = ptrtoint i32* [[TMP31]] to i64 +// CHECK11-NEXT: [[SUB_PTR_RHS_CAST24:%.*]] = ptrtoint i32* [[TMP32]] to i64 // CHECK11-NEXT: [[SUB_PTR_SUB25:%.*]] = sub i64 [[SUB_PTR_LHS_CAST23]], [[SUB_PTR_RHS_CAST24]] // CHECK11-NEXT: [[SUB_PTR_DIV26:%.*]] = sdiv exact i64 [[SUB_PTR_SUB25]], 4 // CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i64 [[SUB_PTR_DIV26]], 1 // CHECK11-NEXT: [[ADD28:%.*]] = add nsw i64 [[SUB27]], 1 // CHECK11-NEXT: [[DIV29:%.*]] = sdiv i64 [[ADD28]], 1 // CHECK11-NEXT: [[MUL30:%.*]] = mul nsw i64 1, [[DIV29]] -// CHECK11-NEXT: [[DIV31:%.*]] = sdiv i64 [[TMP28]], [[MUL30]] +// CHECK11-NEXT: [[DIV31:%.*]] = sdiv i64 [[TMP30]], [[MUL30]] // CHECK11-NEXT: [[MUL32:%.*]] = mul nsw i64 [[DIV31]], 1 -// CHECK11-NEXT: [[ADD_PTR33:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i64 [[MUL32]] +// CHECK11-NEXT: [[ADD_PTR33:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[MUL32]] // CHECK11-NEXT: store i32* [[ADD_PTR33]], i32** [[__BEGIN119]], align 8 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_8]], align 8 -// CHECK11-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: [[SUB_PTR_LHS_CAST34:%.*]] = ptrtoint i32* [[TMP34]] to i64 -// CHECK11-NEXT: [[SUB_PTR_RHS_CAST35:%.*]] = ptrtoint i32* [[TMP35]] to i64 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK11-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK11-NEXT: [[SUB_PTR_LHS_CAST34:%.*]] = ptrtoint i32* [[TMP36]] to i64 +// CHECK11-NEXT: [[SUB_PTR_RHS_CAST35:%.*]] = ptrtoint i32* [[TMP37]] to i64 // CHECK11-NEXT: [[SUB_PTR_SUB36:%.*]] = sub i64 [[SUB_PTR_LHS_CAST34]], [[SUB_PTR_RHS_CAST35]] // CHECK11-NEXT: [[SUB_PTR_DIV37:%.*]] = sdiv exact i64 [[SUB_PTR_SUB36]], 4 // CHECK11-NEXT: [[SUB38:%.*]] = sub nsw i64 [[SUB_PTR_DIV37]], 1 // CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[SUB38]], 1 // CHECK11-NEXT: [[DIV40:%.*]] = sdiv i64 [[ADD39]], 1 // CHECK11-NEXT: [[MUL41:%.*]] = mul nsw i64 1, [[DIV40]] -// CHECK11-NEXT: [[DIV42:%.*]] = sdiv i64 [[TMP33]], [[MUL41]] -// CHECK11-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_8]], align 8 -// CHECK11-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: [[SUB_PTR_LHS_CAST43:%.*]] = ptrtoint i32* [[TMP36]] to i64 -// CHECK11-NEXT: [[SUB_PTR_RHS_CAST44:%.*]] = ptrtoint i32* [[TMP37]] to i64 +// CHECK11-NEXT: [[DIV42:%.*]] = sdiv i64 [[TMP35]], [[MUL41]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK11-NEXT: [[SUB_PTR_LHS_CAST43:%.*]] = ptrtoint i32* [[TMP38]] to i64 +// CHECK11-NEXT: [[SUB_PTR_RHS_CAST44:%.*]] = ptrtoint i32* [[TMP39]] to i64 // CHECK11-NEXT: [[SUB_PTR_SUB45:%.*]] = sub i64 [[SUB_PTR_LHS_CAST43]], [[SUB_PTR_RHS_CAST44]] // CHECK11-NEXT: [[SUB_PTR_DIV46:%.*]] = sdiv exact i64 [[SUB_PTR_SUB45]], 4 // CHECK11-NEXT: [[SUB47:%.*]] = sub nsw i64 [[SUB_PTR_DIV46]], 1 @@ -5079,32 +5349,32 @@ // CHECK11-NEXT: [[DIV49:%.*]] = sdiv i64 [[ADD48]], 1 // CHECK11-NEXT: [[MUL50:%.*]] = mul nsw i64 1, [[DIV49]] // CHECK11-NEXT: [[MUL51:%.*]] = mul nsw i64 [[DIV42]], [[MUL50]] -// CHECK11-NEXT: [[SUB52:%.*]] = sub nsw i64 [[TMP32]], [[MUL51]] +// CHECK11-NEXT: [[SUB52:%.*]] = sub nsw i64 [[TMP34]], [[MUL51]] // CHECK11-NEXT: [[MUL53:%.*]] = mul nsw i64 [[SUB52]], 1 -// CHECK11-NEXT: [[ADD_PTR54:%.*]] = getelementptr inbounds i32, i32* [[TMP31]], i64 [[MUL53]] +// CHECK11-NEXT: [[ADD_PTR54:%.*]] = getelementptr inbounds i32, i32* [[TMP33]], i64 [[MUL53]] // CHECK11-NEXT: store i32* [[ADD_PTR54]], i32** [[__BEGIN220]], align 8 -// CHECK11-NEXT: [[TMP38:%.*]] = load i32*, i32** [[__BEGIN119]], align 8 -// CHECK11-NEXT: store i32* [[TMP38]], i32** [[A]], align 8 -// CHECK11-NEXT: [[TMP39:%.*]] = load i32*, i32** [[__BEGIN220]], align 8 -// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 -// CHECK11-NEXT: store i32 [[TMP40]], i32* [[B]], align 4 -// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[B]], align 4 -// CHECK11-NEXT: [[TMP42:%.*]] = load i32*, i32** [[A]], align 8 -// CHECK11-NEXT: store i32 [[TMP41]], i32* [[TMP42]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32*, i32** [[__BEGIN119]], align 8 +// CHECK11-NEXT: store i32* [[TMP40]], i32** [[A]], align 8 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32*, i32** [[__BEGIN220]], align 8 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4 +// CHECK11-NEXT: store i32 [[TMP42]], i32* [[B]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, i32* [[B]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32*, i32** [[A]], align 8 +// CHECK11-NEXT: store i32 [[TMP43]], i32* [[TMP44]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP43:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD55:%.*]] = add nsw i64 [[TMP43]], 1 +// CHECK11-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD55:%.*]] = add nsw i64 [[TMP45]], 1 // CHECK11-NEXT: store i64 [[ADD55]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP45]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void diff --git a/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp b/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp --- a/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp +++ b/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp @@ -38,128 +38,143 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: store i32 0, i32* [[A]], align 4 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[A]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[A]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[A1:%.*]] = alloca [[STRUCT_LASPRIVATE_CONDITIONAL:%.*]], align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca [[STRUCT_LASPRIVATE_CONDITIONAL:%.*]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_7:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], %struct.lasprivate.conditional* [[A1]], i32 0, i32 1 -// CHECK1-NEXT: store i8 0, i8* [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], %struct.lasprivate.conditional* [[A1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], %struct.lasprivate.conditional* [[A]], i32 0, i32 1 +// CHECK1-NEXT: store i8 0, i8* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], %struct.lasprivate.conditional* [[A]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP11]], 5 -// CHECK1-NEXT: br i1 [[CMP3]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP13]], 5 +// CHECK1-NEXT: br i1 [[CMP2]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] // CHECK1: if.then: -// CHECK1-NEXT: store i32 0, i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp sle i32 [[TMP13]], [[TMP12]] -// CHECK1-NEXT: br i1 [[TMP14]], label [[LP_COND_THEN:%.*]], label [[LP_COND_EXIT:%.*]] +// CHECK1-NEXT: store i32 0, i32* [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = icmp sle i32 [[TMP15]], [[TMP14]] +// CHECK1-NEXT: br i1 [[TMP16]], label [[LP_COND_THEN:%.*]], label [[LP_COND_EXIT:%.*]] // CHECK1: lp_cond_then: -// CHECK1-NEXT: store i32 [[TMP12]], i32* @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], i32* @{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], i32* @{{pl_cond[.].+[.|,]}} align 4 // CHECK1-NEXT: br label [[LP_COND_EXIT]] // CHECK1: lp_cond_exit: -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 10) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[TMP2]], i32* [[I]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp sle i32 [[TMP17]], [[TMP16]] -// CHECK1-NEXT: br i1 [[TMP18]], label [[LP_COND_THEN4:%.*]], label [[LP_COND_EXIT5:%.*]] -// CHECK1: lp_cond_then4: -// CHECK1-NEXT: store i32 [[TMP16]], i32* @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], i32* @{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: br label [[LP_COND_EXIT5]] -// CHECK1: lp_cond_exit5: -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP20]] monotonic, align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp sle i32 [[TMP23]], [[TMP22]] -// CHECK1-NEXT: br i1 [[TMP24]], label [[LP_COND_THEN6:%.*]], label [[LP_COND_EXIT7:%.*]] -// CHECK1: lp_cond_then6: -// CHECK1-NEXT: store i32 [[TMP22]], i32* @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP25]], i32* @{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: br label [[LP_COND_EXIT7]] -// CHECK1: lp_cond_exit7: -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 10) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[TMP2]], i32* [[I]]) -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], %struct.lasprivate.conditional* [[A1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP27:%.*]] = load i8, i8* [[TMP26]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i8 [[TMP27]], 0 -// CHECK1-NEXT: br i1 [[TMP28]], label [[LPC_THEN:%.*]], label [[LPC_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 10) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[I]], i32** [[TMP19]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp sle i32 [[TMP21]], [[TMP20]] +// CHECK1-NEXT: br i1 [[TMP22]], label [[LP_COND_THEN3:%.*]], label [[LP_COND_EXIT4:%.*]] +// CHECK1: lp_cond_then3: +// CHECK1-NEXT: store i32 [[TMP20]], i32* @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP23]], i32* @{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: br label [[LP_COND_EXIT4]] +// CHECK1: lp_cond_exit4: +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = atomicrmw add i32* [[TMP4]], i32 [[TMP24]] monotonic, align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp sle i32 [[TMP27]], [[TMP26]] +// CHECK1-NEXT: br i1 [[TMP28]], label [[LP_COND_THEN5:%.*]], label [[LP_COND_EXIT6:%.*]] +// CHECK1: lp_cond_then5: +// CHECK1-NEXT: store i32 [[TMP26]], i32* @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], i32* @{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: br label [[LP_COND_EXIT6]] +// CHECK1: lp_cond_exit6: +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 10) +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[I]], i32** [[TMP31]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_7]]) +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], %struct.lasprivate.conditional* [[A]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP33:%.*]] = load i8, i8* [[TMP32]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i8 [[TMP33]], 0 +// CHECK1-NEXT: br i1 [[TMP34]], label [[LPC_THEN:%.*]], label [[LPC_DONE:%.*]] // CHECK1: lpc.then: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = icmp sle i32 [[TMP30]], [[TMP29]] -// CHECK1-NEXT: br i1 [[TMP31]], label [[LP_COND_THEN8:%.*]], label [[LP_COND_EXIT9:%.*]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp sle i32 [[TMP36]], [[TMP35]] +// CHECK1-NEXT: br i1 [[TMP37]], label [[LP_COND_THEN8:%.*]], label [[LP_COND_EXIT9:%.*]] // CHECK1: lp_cond_then8: -// CHECK1-NEXT: store i32 [[TMP29]], i32* @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP32]], i32* @{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: store i32 [[TMP35]], i32* @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP38]], i32* @{{pl_cond[.].+[.|,]}} align 4 // CHECK1-NEXT: br label [[LP_COND_EXIT9]] // CHECK1: lp_cond_exit9: -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) // CHECK1-NEXT: br label [[LPC_DONE]] // CHECK1: lpc.done: // CHECK1-NEXT: br label [[IF_END]] @@ -168,69 +183,70 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP39]], 1 // CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP4]]) -// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP6]]) +// CHECK1-NEXT: br i1 [[TMP41]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* @{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: store i32 [[TMP36]], i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP37]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* @{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: store i32 [[TMP42]], i32* [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP43]], i32* [[TMP2]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP2]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[A1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK1-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP7]], i32 1, i64 8, i8* [[TMP8]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[A]] to i8* +// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP10]], i32 1, i64 8, i8* [[TMP11]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP12]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: store i32 [[ADD1]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP12]] monotonic, align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP15]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -261,22 +277,23 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP2]] monotonic, align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to %struct.lasprivate.conditional* -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL:%.*]], %struct.lasprivate.conditional* [[TMP4]], i32 0, i32 1 -// CHECK1-NEXT: store atomic volatile i8 1, i8* [[TMP5]] unordered, align 1 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP5]] monotonic, align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP2]] to %struct.lasprivate.conditional* +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL:%.*]], %struct.lasprivate.conditional* [[TMP7]], i32 0, i32 1 +// CHECK1-NEXT: store atomic volatile i8 1, i8* [[TMP8]] unordered, align 1 // CHECK1-NEXT: ret void // diff --git a/clang/test/OpenMP/parallel_for_linear_codegen.cpp b/clang/test/OpenMP/parallel_for_linear_codegen.cpp --- a/clang/test/OpenMP/parallel_for_linear_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_linear_codegen.cpp @@ -93,17 +93,22 @@ // CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: [[PVAR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[LVAR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TEST]], i32 0, i32 0 // CHECK1-NEXT: store float* [[F]], float** [[PVAR]], align 8 // CHECK1-NEXT: store i64 0, i64* [[LVAR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, i64*)* @.omp_outlined. to void (i32*, i32*, ...)*), float** [[PVAR]], i64* [[LVAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store float** [[PVAR]], float*** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[LVAR]], i64** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4:[0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP0]] +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP2]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -117,12 +122,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[PVAR:%.*]], i64* noundef nonnull align 8 dereferenceable(8) [[LVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[PVAR_ADDR:%.*]] = alloca float**, align 8 -// CHECK1-NEXT: [[LVAR_ADDR:%.*]] = alloca i64*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca float*, align 8 @@ -132,88 +136,90 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[PVAR2:%.*]] = alloca float*, align 8 -// CHECK1-NEXT: [[LVAR3:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[PVAR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[LVAR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store float** [[PVAR]], float*** [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64* [[LVAR]], i64** [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float**, float*** [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64*, i64** [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK1-NEXT: store float* [[TMP2]], float** [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 8 -// CHECK1-NEXT: store i64 [[TMP3]], i64* [[DOTLINEAR_START1]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK1-NEXT: store float* [[TMP5]], float** [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: store i64 [[TMP6]], i64* [[DOTLINEAR_START1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]]) -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]]) +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load float*, float** [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP13]], 3 -// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL5]] to i64 -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDX_EXT]] -// CHECK1-NEXT: store float* [[ADD_PTR]], float** [[PVAR2]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTLINEAR_START1]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 3 -// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[MUL6]] to i64 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP14]], [[CONV]] -// CHECK1-NEXT: store i64 [[ADD7]], i64* [[LVAR3]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load float*, float** [[PVAR2]], align 8 -// CHECK1-NEXT: [[ADD_PTR8:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 3 -// CHECK1-NEXT: store float* [[ADD_PTR8]], float** [[PVAR2]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[LVAR3]], align 8 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i64 [[TMP17]], 3 -// CHECK1-NEXT: store i64 [[ADD9]], i64* [[LVAR3]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load float*, float** [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP16]], 3 +// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL3]] to i64 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDX_EXT]] +// CHECK1-NEXT: store float* [[ADD_PTR]], float** [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTLINEAR_START1]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP18]], 3 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[MUL4]] to i64 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i64 [[TMP17]], [[CONV]] +// CHECK1-NEXT: store i64 [[ADD5]], i64* [[LVAR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load float*, float** [[PVAR]], align 8 +// CHECK1-NEXT: [[ADD_PTR6:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 3 +// CHECK1-NEXT: store float* [[ADD_PTR6]], float** [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[LVAR]], align 8 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP20]], 3 +// CHECK1-NEXT: store i64 [[ADD7]], i64* [[LVAR]], align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP21:%.*]] = load float*, float** [[PVAR2]], align 8 -// CHECK1-NEXT: store float* [[TMP21]], float** [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[LVAR3]], align 8 -// CHECK1-NEXT: store i64 [[TMP22]], i64* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load float*, float** [[PVAR]], align 8 +// CHECK1-NEXT: store float* [[TMP24]], float** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[LVAR]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[TMP4]], align 8 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -225,11 +231,16 @@ // CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 // CHECK1-NEXT: [[PVAR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[LVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TEST]], i32 0, i32 0 // CHECK1-NEXT: store i32* [[F]], i32** [[PVAR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[LVAR]], align 4 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32**, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32** [[PVAR]], i32* [[LVAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32** [[PVAR]], i32*** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[LVAR]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK1-NEXT: ret i32 0 // @@ -266,12 +277,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[PVAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[LVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[PVAR_ADDR:%.*]] = alloca i32**, align 8 -// CHECK1-NEXT: [[LVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32*, align 8 @@ -281,87 +291,89 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[PVAR2:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[LVAR3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[PVAR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[LVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32** [[PVAR]], i32*** [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[LVAR]], i32** [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP0]], align 8 -// CHECK1-NEXT: store i32* [[TMP2]], i32** [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[DOTLINEAR_START1]], align 4 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP2]], align 8 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTLINEAR_START1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL5]] to i64 -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i64 [[IDX_EXT]] -// CHECK1-NEXT: store i32* [[ADD_PTR]], i32** [[PVAR2]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], [[MUL6]] -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[LVAR3]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[PVAR2]], align 8 -// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i32 1 -// CHECK1-NEXT: store i32* [[INCDEC_PTR]], i32** [[PVAR2]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[LVAR3]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[LVAR3]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL3]] to i64 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i64 [[IDX_EXT]] +// CHECK1-NEXT: store i32* [[ADD_PTR]], i32** [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[MUL4]] +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[LVAR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[PVAR]], align 8 +// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 1 +// CHECK1-NEXT: store i32* [[INCDEC_PTR]], i32** [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[LVAR]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[LVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[PVAR2]], align 8 -// CHECK1-NEXT: store i32* [[TMP21]], i32** [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[LVAR3]], align 4 -// CHECK1-NEXT: store i32 [[TMP22]], i32* [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[PVAR]], align 8 +// CHECK1-NEXT: store i32* [[TMP24]], i32** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[LVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], i32* [[TMP4]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -417,11 +429,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[G:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -430,75 +442,77 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTLINEAR_START]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP3]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP11]], 5 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], [[MUL3]] -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[G1]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[G1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 5 -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[G1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store i32* [[G1]], i32** [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP13]], 5 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[MUL2]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[G]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[G]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 5 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[G]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[G]], i32** [[TMP15]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[G1]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[G]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[TMP2]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: // CHECK3-NEXT: ret void @@ -520,19 +534,22 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* @g) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store i32* @g, i32** [[TMP0]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[G:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -541,55 +558,57 @@ // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK4-NEXT: store i32 [[TMP3]], i32* [[DOTLINEAR_START]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP3]]) -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]]) +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP11]], 5 -// CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], [[MUL3]] -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[G1]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[G1]], align 4 -// CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 5 -// CHECK4-NEXT: store i32 [[ADD5]], i32* [[G1]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[G1]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP13]], 5 +// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[MUL2]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[G]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[G]], align 4 +// CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 5 +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[G]], align 4 +// CHECK4-NEXT: store i32 1, i32* [[G]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* [[BLOCK]], i32 0, i32 1 @@ -601,33 +620,33 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.1 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP13:%.*]] = load volatile i32, i32* [[G1]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP13]], i32* [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* [[BLOCK]] to void ()* -// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP14]] to %struct.__block_literal_generic* -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP16:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK4-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP15]], align 8 -// CHECK4-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP18]](i8* noundef [[TMP16]]) +// CHECK4-NEXT: [[TMP15:%.*]] = load volatile i32, i32* [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP15]], i32* [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* [[BLOCK]] to void ()* +// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP16]] to %struct.__block_literal_generic* +// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP18:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK4-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP17]], align 8 +// CHECK4-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP20]](i8* noundef [[TMP18]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK4-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK4-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]]) +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK4-NEXT: br i1 [[TMP23]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK4: .omp.linear.pu: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[G1]], align 4 -// CHECK4-NEXT: store i32 [[TMP22]], i32* [[TMP0]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[G]], align 4 +// CHECK4-NEXT: store i32 [[TMP24]], i32* [[TMP2]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK4: .omp.linear.pu.done: // CHECK4-NEXT: ret void diff --git a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp @@ -40,291 +40,297 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8** [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[ARGC_ADDR]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store i8** [[TMP2]], i8*** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP28:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 9, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint i8* [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP13]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP14]] +// CHECK1-NEXT: store i32 0, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8*, i8** [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint i8* [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP16]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint i8* [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store i8** [[_TMP6]], i8*** [[_TMP5]], align 8 -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[_TMP6]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint i8* [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store i8** [[_TMP5]], i8*** [[_TMP4]], align 8 +// CHECK1-NEXT: store i8* [[TMP24]], i8** [[_TMP5]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, i64* [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[TMP30]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP31]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8*, i8** [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load i8*, i8** [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, i8* [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP36]] -// CHECK1-NEXT: [[TMP37:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8*, i8** [[TMP37]], i64 9 -// CHECK1-NEXT: [[TMP38:%.*]] = load i8*, i8** [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, i8* [[TMP38]], i64 [[LB_ADD_LEN10]] -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 -// CHECK1-NEXT: store i8* [[ARRAYIDX9]], i8** [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint i8* [[ARRAYIDX12]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint i8* [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] -// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 -// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP45]], i64* [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..1 to i8*), i8** [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..2 to i8*), i8** [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, i32* [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 -// CHECK1-NEXT: [[TMP53:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* -// CHECK1-NEXT: [[TMP54:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, i8* [[TMP53]]) -// CHECK1-NEXT: store i8* [[TMP54]], i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP56]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP57]], 9 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[ARGC]] to i8* +// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP2]] to i8* +// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, i64* [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP34]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP36:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8*, i8** [[TMP36]], i64 0 +// CHECK1-NEXT: [[TMP37:%.*]] = load i8*, i8** [[ARRAYIDX7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, i8* [[TMP37]], i64 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP39]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8*, i8** [[TMP40]], i64 9 +// CHECK1-NEXT: [[TMP41:%.*]] = load i8*, i8** [[ARRAYIDX10]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 +// CHECK1-NEXT: store i8* [[ARRAYIDX8]], i8** [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = ptrtoint i8* [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint i8* [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP45:%.*]] = sub i64 [[TMP43]], [[TMP44]] +// CHECK1-NEXT: [[TMP46:%.*]] = sdiv exact i64 [[TMP45]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP47:%.*]] = add nuw i64 [[TMP46]], 1 +// CHECK1-NEXT: [[TMP48:%.*]] = mul nuw i64 [[TMP47]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP48]], i64* [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..1 to i8*), i8** [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP51]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..2 to i8*), i8** [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, i32* [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[TMP54]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* +// CHECK1-NEXT: [[TMP57:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP55]], i32 1, i32 2, i8* [[TMP56]]) +// CHECK1-NEXT: store i8* [[TMP57]], i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, i32* [[TMP58]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP59]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP60:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP60]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP58]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP61]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP59]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP62]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP60]], [[TMP61]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP63:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP63]], [[TMP64]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP62]], 1 +// CHECK1-NEXT: [[TMP65:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP65]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP63]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[ARGC1]], i32** [[TMP64]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP66:%.*]] = load i8**, i8*** [[_TMP5]], align 8 -// CHECK1-NEXT: store i8** [[TMP66]], i8*** [[TMP65]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = load i32, i32* [[TMP67]], align 4 -// CHECK1-NEXT: [[TMP69:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP68]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP70:%.*]] = bitcast i8* [[TMP69]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP70]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP71]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP73:%.*]] = load i8*, i8** [[TMP72]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP73]], i8* align 8 [[TMP74]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP70]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP75]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP77:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store i8* [[TMP77]], i8** [[TMP76]], align 8 -// CHECK1-NEXT: [[TMP78:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP79:%.*]] = load i32, i32* [[TMP78]], align 4 -// CHECK1-NEXT: [[TMP80:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP79]], i8* [[TMP69]]) +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP66]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[ARGC]], i32** [[TMP67]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP69:%.*]] = load i8**, i8*** [[_TMP4]], align 8 +// CHECK1-NEXT: store i8** [[TMP69]], i8*** [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load i32, i32* [[TMP70]], align 4 +// CHECK1-NEXT: [[TMP72:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP71]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP73:%.*]] = bitcast i8* [[TMP72]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP73]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP74]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP76:%.*]] = load i8*, i8** [[TMP75]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP76]], i8* align 8 [[TMP77]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP73]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP78]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP80:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store i8* [[TMP80]], i8** [[TMP79]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = load i32, i32* [[TMP81]], align 4 +// CHECK1-NEXT: [[TMP83:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP82]], i8* [[TMP72]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP81:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP81]], 1 -// CHECK1-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP84]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP82:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP83:%.*]] = load i32, i32* [[TMP82]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP83]]) -// CHECK1-NEXT: [[TMP84:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, i32* [[TMP84]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP85]], i32 1) -// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP87:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP87]], i8** [[TMP86]], align 8 -// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP88]], align 8 -// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP90:%.*]] = inttoptr i64 [[TMP11]] to i8* +// CHECK1-NEXT: [[TMP85:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP86:%.*]] = load i32, i32* [[TMP85]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP86]]) +// CHECK1-NEXT: [[TMP87:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, i32* [[TMP87]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP88]], i32 1) +// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP90:%.*]] = bitcast i32* [[ARGC]] to i8* // CHECK1-NEXT: store i8* [[TMP90]], i8** [[TMP89]], align 8 -// CHECK1-NEXT: [[TMP91:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP92:%.*]] = load i32, i32* [[TMP91]], align 4 -// CHECK1-NEXT: [[TMP93:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP94:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP92]], i32 2, i64 24, i8* [[TMP93]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP94]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP91]], align 8 +// CHECK1-NEXT: [[TMP92:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP93:%.*]] = inttoptr i64 [[TMP14]] to i8* +// CHECK1-NEXT: store i8* [[TMP93]], i8** [[TMP92]], align 8 +// CHECK1-NEXT: [[TMP94:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP95:%.*]] = load i32, i32* [[TMP94]], align 4 +// CHECK1-NEXT: [[TMP96:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP97:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP95]], i32 2, i64 24, i8* [[TMP96]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP97]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP95:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP96:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP95]], [[TMP96]] -// CHECK1-NEXT: store i32 [[ADD15]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP97:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP97]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP98:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP99:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP98]], [[TMP99]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP100:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP100]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP98:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP98]] to i32 -// CHECK1-NEXT: [[TMP99:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP99]] to i32 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]] -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP101:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP101]] to i32 +// CHECK1-NEXT: [[TMP102:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP102]] to i32 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK1-NEXT: store i8 [[CONV18]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP97]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done22: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP92]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP100]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done21: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP95]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP100:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP101:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP100]] monotonic, align 4 -// CHECK1-NEXT: [[TMP102:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP102]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] -// CHECK1: omp.arraycpy.body24: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP103:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP103]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1 +// CHECK1-NEXT: [[TMP103:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP104:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP103]] monotonic, align 4 +// CHECK1-NEXT: [[TMP105:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP105]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] +// CHECK1: omp.arraycpy.body23: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP106:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP106]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP104:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP109:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP104]], i8* [[_TMP28]], align 1 -// CHECK1-NEXT: [[TMP105:%.*]] = load i8, i8* [[_TMP28]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP105]] to i32 -// CHECK1-NEXT: [[TMP106:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP106]] to i32 -// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]] -// CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK1-NEXT: store i8 [[CONV32]], i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP107:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP108:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP104]], i8 [[TMP107]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP109]] = extractvalue { i8, i1 } [[TMP108]], 0 -// CHECK1-NEXT: [[TMP110:%.*]] = extractvalue { i8, i1 } [[TMP108]], 1 -// CHECK1-NEXT: br i1 [[TMP110]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP107:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP112:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP107]], i8* [[_TMP27]], align 1 +// CHECK1-NEXT: [[TMP108:%.*]] = load i8, i8* [[_TMP27]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP108]] to i32 +// CHECK1-NEXT: [[TMP109:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP109]] to i32 +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] +// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK1-NEXT: store i8 [[CONV31]], i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP110:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP111:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP107]], i8 [[TMP110]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP112]] = extractvalue { i8, i1 } [[TMP111]], 0 +// CHECK1-NEXT: [[TMP113:%.*]] = extractvalue { i8, i1 } [[TMP111]], 1 +// CHECK1-NEXT: br i1 [[TMP113]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP102]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]] -// CHECK1: omp.arraycpy.done36: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP105]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] +// CHECK1: omp.arraycpy.done35: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP111:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP111]]) +// CHECK1-NEXT: [[TMP114:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP114]]) // CHECK1-NEXT: ret void // // @@ -434,7 +440,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP4_I:%.*]] = alloca i8*, align 8 @@ -448,7 +454,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -461,29 +467,29 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load i8*, i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 // CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load i8**, i8*** [[TMP30]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i8*, i8** [[TMP31]], i64 9 // CHECK1-NEXT: [[TMP32:%.*]] = load i8*, i8** [[ARRAYIDX2_I]], align 8 @@ -494,10 +500,10 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 // CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint i8* [[TMP43]] to i64 diff --git a/clang/test/OpenMP/parallel_for_simd_aligned_codegen.cpp b/clang/test/OpenMP/parallel_for_simd_aligned_codegen.cpp --- a/clang/test/OpenMP/parallel_for_simd_aligned_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_simd_aligned_codegen.cpp @@ -25,17 +25,20 @@ // CHECK1-SAME: (float* noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**)* @.omp_outlined. to void (i32*, i32*, ...)*), float** [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store float** [[C_ADDR]], float*** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca float**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -45,56 +48,58 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float**, float*** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP1]], i64 16) ] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load float**, float*** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8 +// CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP3]], i64 16) ] // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !3 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !3 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/parallel_if_codegen.cpp b/clang/test/OpenMP/parallel_if_codegen.cpp --- a/clang/test/OpenMP/parallel_if_codegen.cpp +++ b/clang/test/OpenMP/parallel_if_codegen.cpp @@ -70,35 +70,43 @@ // CHECK1-LABEL: define {{[^@]+}}@_Z9gtid_testv // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..1(i32* [[TMP2]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..1(i32* [[TMP3]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: call void @_Z9gtid_testv() // CHECK1-NEXT: ret void // @@ -107,29 +115,32 @@ // CHECK1-SAME: () #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] +// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_1]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* @Arg, align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] +// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_3]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR4]], align 4 +// CHECK1-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_3]], i32* [[DOTBOUND_ZERO_ADDR4]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_2]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -139,34 +150,43 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: call void @_Z3fn4v() // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: call void @_Z3fn5v() // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: call void @_Z3fn6v() // CHECK1-NEXT: ret void // @@ -175,29 +195,32 @@ // CHECK1-SAME: (i32 noundef [[ARG:%.*]]) #[[ATTR0]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 1 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[ARG]], i32* [[ARG_ADDR]], align 4 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] +// CHECK1-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_1]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARG_ADDR]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] +// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_3]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR4]], align 4 +// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[DOTTHREADID_TEMP_3]], i32* [[DOTBOUND_ZERO_ADDR4]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_2]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -205,34 +228,43 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: call void @_Z3fn1v() // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: call void @_Z3fn2v() // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: call void @_Z3fn3v() // CHECK1-NEXT: ret void // diff --git a/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp b/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp --- a/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp +++ b/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp @@ -18,37 +18,45 @@ // CHECK-LABEL: define {{[^@]+}}@_Z3foov // CHECK-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA3:![0-9]+]] // CHECK-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK-NEXT: ret void // // // CHECK: Function Attrs: noinline norecurse nounwind // CHECK-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7:![0-9]+]] // CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: ret void // // -// CHECK: Function Attrs: alwaysinline norecurse nounwind +// CHECK: Function Attrs: norecurse nounwind // CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7]] // CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: ret void // // @@ -56,36 +64,44 @@ // CHECK-NOINLINE-LABEL: define {{[^@]+}}@_Z3foov // CHECK-NOINLINE-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-NOINLINE-NEXT: entry: +// CHECK-NOINLINE-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK-NOINLINE-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-NOINLINE-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NOINLINE-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK-NOINLINE-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK-NOINLINE-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK-NOINLINE-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA3:![0-9]+]] // CHECK-NOINLINE-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK-NOINLINE-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] +// CHECK-NOINLINE-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK-NOINLINE-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -// CHECK-NOINLINE-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// CHECK-NOINLINE-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK-NOINLINE-NEXT: ret void // // // CHECK-NOINLINE: Function Attrs: noinline norecurse nounwind // CHECK-NOINLINE-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK-NOINLINE-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NOINLINE-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-NOINLINE-NEXT: entry: // CHECK-NOINLINE-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-NOINLINE-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NOINLINE-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK-NOINLINE-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7:![0-9]+]] // CHECK-NOINLINE-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NOINLINE-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NOINLINE-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK-NOINLINE-NEXT: ret void // // -// CHECK-NOINLINE: Function Attrs: alwaysinline norecurse nounwind +// CHECK-NOINLINE: Function Attrs: noinline norecurse nounwind // CHECK-NOINLINE-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK-NOINLINE-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-NOINLINE-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-NOINLINE-NEXT: entry: // CHECK-NOINLINE-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-NOINLINE-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NOINLINE-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK-NOINLINE-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7]] // CHECK-NOINLINE-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NOINLINE-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NOINLINE-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 // CHECK-NOINLINE-NEXT: ret void // diff --git a/clang/test/OpenMP/parallel_master_codegen.cpp b/clang/test/OpenMP/parallel_master_codegen.cpp --- a/clang/test/OpenMP/parallel_master_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_codegen.cpp @@ -296,35 +296,39 @@ // CHECK1-LABEL: define {{[^@]+}}@_Z15parallel_masterv // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: // CHECK1-NEXT: invoke void @_Z3foov() // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP5:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP5]]) #[[ATTR6:[0-9]+]] +// CHECK1-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP6]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: unreachable // // @@ -339,28 +343,32 @@ // CHECK5-SAME: () #[[ATTR0:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK5-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK5-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 // CHECK5-NEXT: store i32 [[INC]], i32* [[A]], align 4 -// CHECK5-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: // CHECK5-NEXT: ret void @@ -370,30 +378,35 @@ // CHECK9-SAME: () #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[A]]) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[A]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK9-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK9-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK9-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: ret void @@ -403,35 +416,38 @@ // CHECK13-SAME: () #[[ATTR0:[0-9]+]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK13-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 +// CHECK13-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK13-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK13-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK13-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK13-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK13-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK13-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK13: omp_if.then: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[INC]], i32* [[CONV]], align 4 -// CHECK13-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[A]], align 4 +// CHECK13-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK13-NEXT: store i32 [[INC]], i32* [[A]], align 4 +// CHECK13-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: br label [[OMP_IF_END]] // CHECK13: omp_if.end: // CHECK13-NEXT: ret void @@ -441,18 +457,17 @@ // CHECK17-SAME: () #[[ATTR0:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK17-NEXT: [[Y_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[Y_CASTED1:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[A]]) -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* @_ZZ36parallel_master_default_firstprivatevE1y, align 4 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[Y_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[Y_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* @_ZN2St1yE, align 4 -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[Y_CASTED1]] to i32* -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[Y_CASTED1]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.St*, i64, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.St* [[A]], i64 [[TMP1]], i64 [[TMP3]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store %struct.St* [[A]], %struct.St** [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* @_ZZ36parallel_master_default_firstprivatevE1y, align 4 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* @_ZN2St1yE, align 4 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[A]]) #[[ATTR4:[0-9]+]] // CHECK17-NEXT: ret void // @@ -468,49 +483,53 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.St* noundef nonnull align 4 dereferenceable(8) [[A:%.*]], i64 noundef [[Y:%.*]], i64 noundef [[Y1:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca %struct.St*, align 8 -// CHECK17-NEXT: [[Y_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[Y_ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A4:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK17-NEXT: [[Y:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[Y1:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store %struct.St* [[A]], %struct.St** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[Y]], i64* [[Y_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[Y1]], i64* [[Y_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.St*, %struct.St** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[Y_ADDR]] to i32* -// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[Y_ADDR2]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = bitcast %struct.St* [[A4]] to i8* -// CHECK17-NEXT: [[TMP2:%.*]] = bitcast %struct.St* [[TMP0]] to i8* -// CHECK17-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK17-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK17-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK17-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load %struct.St*, %struct.St** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[Y]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], i32* [[Y1]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = bitcast %struct.St* [[A]] to i8* +// CHECK17-NEXT: [[TMP8:%.*]] = bitcast %struct.St* [[TMP2]] to i8* +// CHECK17-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP7]], i8* align 4 [[TMP8]], i64 8, i1 false) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK17-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK17: omp_if.then: -// CHECK17-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[A4]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[A5]], align 4 -// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[A4]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[B]], align 4 -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD6]], i32* [[B]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[INC]], i32* [[CONV]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* @_ZN2St1yE, align 4 -// CHECK17-NEXT: [[INC7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK17-NEXT: store i32 [[INC7]], i32* @_ZN2St1yE, align 4 -// CHECK17-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[A]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[A2]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[A2]], align 4 +// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[A]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[B]], align 4 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[B]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[Y]], align 4 +// CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: store i32 [[INC]], i32* [[Y]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* @_ZN2St1yE, align 4 +// CHECK17-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK17-NEXT: store i32 [[INC4]], i32* @_ZN2St1yE, align 4 +// CHECK17-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: br label [[OMP_IF_END]] // CHECK17: omp_if.end: -// CHECK17-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[A4]]) #[[ATTR4]] +// CHECK17-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[A]]) #[[ATTR4]] // CHECK17-NEXT: ret void // // @@ -550,35 +569,38 @@ // CHECK21-SAME: () #[[ATTR0:[0-9]+]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 -// CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK21-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 +// CHECK21-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK21-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK21-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK21-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK21-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK21-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK21-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK21-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK21: omp_if.then: -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK21-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK21-NEXT: store i32 [[INC]], i32* [[CONV]], align 4 -// CHECK21-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[A]], align 4 +// CHECK21-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK21-NEXT: store i32 [[INC]], i32* [[A]], align 4 +// CHECK21-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: // CHECK21-NEXT: ret void @@ -587,40 +609,44 @@ // CHECK25-LABEL: define {{[^@]+}}@_Z22parallel_master_copyinv // CHECK25-SAME: () #[[ATTR0:[0-9]+]] { // CHECK25-NEXT: entry: -// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK25-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK25-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast (i32* @a to i8*), i64 4, i8*** @a.cache.) -// CHECK25-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32* -// CHECK25-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[TMP3]] to i64 -// CHECK25-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (i32* @a to i64), [[TMP4]] -// CHECK25-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK25-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK25-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i32* @a to i8*), i64 4, i8*** @a.cache.) +// CHECK25-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i32* +// CHECK25-NEXT: [[TMP5:%.*]] = ptrtoint i32* [[TMP4]] to i64 +// CHECK25-NEXT: [[TMP6:%.*]] = icmp ne i64 ptrtoint (i32* @a to i64), [[TMP5]] +// CHECK25-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK25: copyin.not.master: -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, i32* @a, align 4 -// CHECK25-NEXT: store i32 [[TMP6]], i32* [[TMP3]], align 4 +// CHECK25-NEXT: [[TMP7:%.*]] = load i32, i32* @a, align 4 +// CHECK25-NEXT: store i32 [[TMP7]], i32* [[TMP4]], align 4 // CHECK25-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK25: copyin.not.master.end: -// CHECK25-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) -// CHECK25-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK25-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK25-NEXT: br i1 [[TMP8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK25-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK25-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK25-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK25-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK25: omp_if.then: -// CHECK25-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* bitcast (i32* @a to i8*), i64 4, i8*** @a.cache.) -// CHECK25-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK25-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK25-NEXT: store i32 [[INC]], i32* [[TMP10]], align 4 -// CHECK25-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK25-NEXT: [[TMP10:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i32* @a to i8*), i64 4, i8*** @a.cache.) +// CHECK25-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32* +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK25-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK25-NEXT: store i32 [[INC]], i32* [[TMP11]], align 4 +// CHECK25-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK25-NEXT: br label [[OMP_IF_END]] // CHECK25: omp_if.end: // CHECK25-NEXT: ret void @@ -629,41 +655,46 @@ // CHECK29-LABEL: define {{[^@]+}}@_Z22parallel_master_copyinv // CHECK29-SAME: () #[[ATTR0:[0-9]+]] { // CHECK29-NEXT: entry: -// CHECK29-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* @a) +// CHECK29-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK29-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK29-NEXT: store i32* @a, i32** [[TMP0]], align 8 +// CHECK29-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK29-NEXT: ret void // // // CHECK29-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK29-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK29-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK29-NEXT: entry: // CHECK29-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK29-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK29-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK29-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK29-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK29-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK29-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK29-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK29-NEXT: [[TMP1:%.*]] = ptrtoint i32* [[TMP0]] to i64 -// CHECK29-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], ptrtoint (i32* @a to i64) -// CHECK29-NEXT: br i1 [[TMP2]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK29-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK29-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK29-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK29-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK29-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP2]] to i64 +// CHECK29-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], ptrtoint (i32* @a to i64) +// CHECK29-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK29: copyin.not.master: -// CHECK29-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK29-NEXT: store i32 [[TMP3]], i32* @a, align 4 +// CHECK29-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK29-NEXT: store i32 [[TMP5]], i32* @a, align 4 // CHECK29-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK29: copyin.not.master.end: -// CHECK29-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK29-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK29-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]]) // CHECK29-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK29-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK29-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) -// CHECK29-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK29-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK29-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]]) +// CHECK29-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK29-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK29-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]]) +// CHECK29-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK29-NEXT: br i1 [[TMP11]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK29: omp_if.then: -// CHECK29-NEXT: [[TMP10:%.*]] = load i32, i32* @a, align 4 -// CHECK29-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK29-NEXT: [[TMP12:%.*]] = load i32, i32* @a, align 4 +// CHECK29-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK29-NEXT: store i32 [[INC]], i32* @a, align 4 -// CHECK29-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) +// CHECK29-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]]) // CHECK29-NEXT: br label [[OMP_IF_END]] // CHECK29: omp_if.end: // CHECK29-NEXT: ret void diff --git a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp --- a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp @@ -40,246 +40,252 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8** [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[ARGC_ADDR]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store i8** [[TMP2]], i8*** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP24:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP23:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint i8* [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP13]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP14]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8*, i8** [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint i8* [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP16]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint i8* [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store i8** [[_TMP5]], i8*** [[TMP]], align 8 -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint i8* [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store i8** [[_TMP4]], i8*** [[TMP]], align 8 +// CHECK1-NEXT: store i8* [[TMP24]], i8** [[_TMP4]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, i64* [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[TMP30]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP31]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8*, i8** [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load i8*, i8** [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, i8* [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP36]] -// CHECK1-NEXT: [[TMP37:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8*, i8** [[TMP37]], i64 9 -// CHECK1-NEXT: [[TMP38:%.*]] = load i8*, i8** [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[TMP38]], i64 [[LB_ADD_LEN9]] -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store i8* [[ARRAYIDX8]], i8** [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint i8* [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint i8* [[ARRAYIDX8]] to i64 -// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] -// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 -// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP45]], i64* [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..1 to i8*), i8** [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..2 to i8*), i8** [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, i32* [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 -// CHECK1-NEXT: [[TMP53:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* -// CHECK1-NEXT: [[TMP54:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]], i32 0, i32 2, i8* [[TMP53]]) -// CHECK1-NEXT: store i8* [[TMP54]], i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4 -// CHECK1-NEXT: [[TMP57:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP56]]) -// CHECK1-NEXT: [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0 -// CHECK1-NEXT: br i1 [[TMP58]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[ARGC]] to i8* +// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP2]] to i8* +// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, i64* [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP34]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP36:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP36]], i64 0 +// CHECK1-NEXT: [[TMP37:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP37]], i64 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 -1, [[TMP39]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8*, i8** [[TMP40]], i64 9 +// CHECK1-NEXT: [[TMP41:%.*]] = load i8*, i8** [[ARRAYIDX9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 [[LB_ADD_LEN8]] +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 +// CHECK1-NEXT: store i8* [[ARRAYIDX7]], i8** [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = ptrtoint i8* [[ARRAYIDX10]] to i64 +// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint i8* [[ARRAYIDX7]] to i64 +// CHECK1-NEXT: [[TMP45:%.*]] = sub i64 [[TMP43]], [[TMP44]] +// CHECK1-NEXT: [[TMP46:%.*]] = sdiv exact i64 [[TMP45]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP47:%.*]] = add nuw i64 [[TMP46]], 1 +// CHECK1-NEXT: [[TMP48:%.*]] = mul nuw i64 [[TMP47]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP48]], i64* [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..1 to i8*), i8** [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP51]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..2 to i8*), i8** [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, i32* [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[TMP54]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* +// CHECK1-NEXT: [[TMP57:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP55]], i32 0, i32 2, i8* [[TMP56]]) +// CHECK1-NEXT: store i8* [[TMP57]], i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, i32* [[TMP58]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP59]]) +// CHECK1-NEXT: [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0 +// CHECK1-NEXT: br i1 [[TMP61]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP59]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[ARGC1]], i32** [[TMP60]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP62:%.*]] = load i8**, i8*** [[TMP]], align 8 -// CHECK1-NEXT: store i8** [[TMP62]], i8*** [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load i32, i32* [[TMP63]], align 4 -// CHECK1-NEXT: [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP64]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP66:%.*]] = bitcast i8* [[TMP65]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP66]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP67]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP69:%.*]] = load i8*, i8** [[TMP68]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP69]], i8* align 8 [[TMP70]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP66]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP71]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP73:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store i8* [[TMP73]], i8** [[TMP72]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP75:%.*]] = load i32, i32* [[TMP74]], align 4 -// CHECK1-NEXT: [[TMP76:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP75]], i8* [[TMP65]]) -// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP56]]) -// CHECK1-NEXT: br label [[OMP_IF_END]] -// CHECK1: omp_if.end: +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[ARGC]], i32** [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP65:%.*]] = load i8**, i8*** [[TMP]], align 8 +// CHECK1-NEXT: store i8** [[TMP65]], i8*** [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load i32, i32* [[TMP66]], align 4 +// CHECK1-NEXT: [[TMP68:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP67]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP69:%.*]] = bitcast i8* [[TMP68]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP69]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP70]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP72:%.*]] = load i8*, i8** [[TMP71]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP72]], i8* align 8 [[TMP73]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP69]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP74]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP76:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store i8* [[TMP76]], i8** [[TMP75]], align 8 // CHECK1-NEXT: [[TMP77:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP78:%.*]] = load i32, i32* [[TMP77]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP78]], i32 0) -// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP80:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP80]], i8** [[TMP79]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP81]], align 8 -// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP83:%.*]] = inttoptr i64 [[TMP11]] to i8* +// CHECK1-NEXT: [[TMP79:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP78]], i8* [[TMP68]]) +// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP59]]) +// CHECK1-NEXT: br label [[OMP_IF_END]] +// CHECK1: omp_if.end: +// CHECK1-NEXT: [[TMP80:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load i32, i32* [[TMP80]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP81]], i32 0) +// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP83:%.*]] = bitcast i32* [[ARGC]] to i8* // CHECK1-NEXT: store i8* [[TMP83]], i8** [[TMP82]], align 8 -// CHECK1-NEXT: [[TMP84:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, i32* [[TMP84]], align 4 -// CHECK1-NEXT: [[TMP86:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP87:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP85]], i32 2, i64 24, i8* [[TMP86]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP87]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP84]], align 8 +// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP86:%.*]] = inttoptr i64 [[TMP14]] to i8* +// CHECK1-NEXT: store i8* [[TMP86]], i8** [[TMP85]], align 8 +// CHECK1-NEXT: [[TMP87:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, i32* [[TMP87]], align 4 +// CHECK1-NEXT: [[TMP89:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP90:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP88]], i32 2, i64 24, i8* [[TMP89]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP90]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP88:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP89:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP88]], [[TMP89]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP90]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP91:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP92:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP91]], [[TMP92]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP93:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP93]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP91:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP91]] to i32 -// CHECK1-NEXT: [[TMP92:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP92]] to i32 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV]], [[CONV13]] -// CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK1-NEXT: store i8 [[CONV15]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP94:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP94]] to i32 +// CHECK1-NEXT: [[TMP95:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV12:%.*]] = sext i8 [[TMP95]] to i32 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV]], [[CONV12]] +// CHECK1-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i8 +// CHECK1-NEXT: store i8 [[CONV14]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP90]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP85]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP93]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done17: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP88]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP93:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP94:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP93]] monotonic, align 4 -// CHECK1-NEXT: [[TMP95:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP95]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] -// CHECK1: omp.arraycpy.body20: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP96:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP96]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST22]] monotonic, align 1 +// CHECK1-NEXT: [[TMP96:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP97:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP96]] monotonic, align 4 +// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE31:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] +// CHECK1: omp.arraycpy.body19: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT29:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT28:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP99:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV22:%.*]] = sext i8 [[TMP99]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST21]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP97:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY20]] ], [ [[TMP102:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP97]], i8* [[_TMP24]], align 1 -// CHECK1-NEXT: [[TMP98:%.*]] = load i8, i8* [[_TMP24]], align 1 -// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP98]] to i32 -// CHECK1-NEXT: [[TMP99:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP99]] to i32 -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[CONV25]], [[CONV26]] -// CHECK1-NEXT: [[CONV28:%.*]] = trunc i32 [[ADD27]] to i8 -// CHECK1-NEXT: store i8 [[CONV28]], i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP100:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP101:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i8 [[TMP97]], i8 [[TMP100]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP102]] = extractvalue { i8, i1 } [[TMP101]], 0 -// CHECK1-NEXT: [[TMP103:%.*]] = extractvalue { i8, i1 } [[TMP101]], 1 -// CHECK1-NEXT: br i1 [[TMP103]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP100:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY19]] ], [ [[TMP105:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP100]], i8* [[_TMP23]], align 1 +// CHECK1-NEXT: [[TMP101:%.*]] = load i8, i8* [[_TMP23]], align 1 +// CHECK1-NEXT: [[CONV24:%.*]] = sext i8 [[TMP101]] to i32 +// CHECK1-NEXT: [[TMP102:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP102]] to i32 +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[CONV24]], [[CONV25]] +// CHECK1-NEXT: [[CONV27:%.*]] = trunc i32 [[ADD26]] to i8 +// CHECK1-NEXT: store i8 [[CONV27]], i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP103:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP104:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i8 [[TMP100]], i8 [[TMP103]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP105]] = extractvalue { i8, i1 } [[TMP104]], 0 +// CHECK1-NEXT: [[TMP106:%.*]] = extractvalue { i8, i1 } [[TMP104]], 1 +// CHECK1-NEXT: br i1 [[TMP106]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP95]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY20]] -// CHECK1: omp.arraycpy.done32: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT28]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT29]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE30:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT28]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_BODY19]] +// CHECK1: omp.arraycpy.done31: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP104:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP104]]) +// CHECK1-NEXT: [[TMP107:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP107]]) // CHECK1-NEXT: ret void // // @@ -389,7 +395,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP4_I:%.*]] = alloca i8*, align 8 @@ -403,7 +409,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -416,29 +422,29 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load i8*, i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 // CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load i8**, i8*** [[TMP30]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i8*, i8** [[TMP31]], i64 9 // CHECK1-NEXT: [[TMP32:%.*]] = load i8*, i8** [[ARRAYIDX2_I]], align 8 @@ -449,10 +455,10 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 // CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint i8* [[TMP43]] to i64 diff --git a/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp --- a/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp @@ -57,108 +57,112 @@ // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED7:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_9:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED2]] to i32* -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED2]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_4]], align 1 +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_3]], align 1 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_4]], align 1 -// CHECK1-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED7]] to i8* -// CHECK1-NEXT: [[FROMBOOL9:%.*]] = zext i1 [[TOBOOL6]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL9]], i8* [[CONV8]], align 1 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED7]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: [[CONV11:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED10]] to i32* -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV11]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED10]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_4]], align 1 -// CHECK1-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP13]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[ARGC_ADDR]], i32** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 1 +// CHECK1-NEXT: store i8*** [[ARGV_ADDR]], i8**** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP12:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_3]], align 1 +// CHECK1-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK1-NEXT: [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL6]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL7]], i8* [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[TMP13]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_3]], align 1 +// CHECK1-NEXT: [[TOBOOL8:%.*]] = trunc i8 [[TMP15]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*)) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP14]] +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_9]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP16]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 4 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP8]] to i32* -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, i64* [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, i64* [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, i64* [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 9 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP14]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP12]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP5]], i32 1, i64* [[TMP10]], i64* [[TMP11]], i64 [[TMP15]], i32 1, i32 0, i64 0, i8* null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP9]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP11]] to i32* +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, i64* [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, i64* [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP17]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP15]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP8]], i32 1, i64* [[TMP13]], i64* [[TMP14]], i64 [[TMP18]], i32 1, i32 0, i64 0, i8* null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -177,7 +181,7 @@ // CHECK1-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[I_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 @@ -190,7 +194,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8 @@ -217,8 +221,8 @@ // CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 // CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 @@ -241,47 +245,51 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..4 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates.1* -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, i64* [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, i64* [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, i64* [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 9 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP12]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP5]], i32 1, i64* [[TMP8]], i64* [[TMP9]], i64 [[TMP13]], i32 1, i32 1, i64 [[TMP14]], i8* null) -// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.3*)* @.omp_task_entry..4 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct.kmp_task_t_with_privates.3* +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP9]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, i64* [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, i64* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, i64* [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP15]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP8]], i32 1, i64* [[TMP11]], i64* [[TMP12]], i64 [[TMP16]], i32 1, i32 1, i64 [[TMP17]], i8* null) +// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..4 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 @@ -293,21 +301,21 @@ // CHECK1-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[I_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.1*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.3*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.1* [[TMP1]], %struct.kmp_task_t_with_privates.1** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.3* [[TMP1]], %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.1*, %struct.kmp_task_t_with_privates.1** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.3*, %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.1* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.3* [[TMP3]] to i8* // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 @@ -333,8 +341,8 @@ // CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !28 // CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !28 -// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 // CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !28 @@ -357,104 +365,109 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8***, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i8*** [[ARGV]], i8**** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8***, i8**** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8***, i8**** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 8 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i8*** [[TMP1]], i8**** [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV3]], align 4 -// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, i8* [[TMP13]], i64 [[IDXPROM8]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i8, i8* [[ARRAYIDX9]], align 1 -// CHECK1-NEXT: [[CONV10:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK1-NEXT: store i32 [[CONV10]], i32* [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i8*** [[TMP4]], i8**** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i8**, i8*** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP18]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP23]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[CONV12:%.*]] = sext i32 [[DIV]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[SUB13:%.*]] = sub i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[SUB13]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB14]], 1 -// CHECK1-NEXT: [[DIV15:%.*]] = udiv i32 [[ADD]], 1 -// CHECK1-NEXT: [[CONV16:%.*]] = zext i32 [[DIV15]] to i64 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV12]], [[CONV16]] -// CHECK1-NEXT: [[SUB17:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK1-NEXT: store i64 [[SUB17]], i64* [[DOTCAPTURE_EXPR_11]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.3*)* @.omp_task_entry..7 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to %struct.kmp_task_t_with_privates.3* -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP20]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP21]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8*, i8** [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP23]], i8* align 8 [[TMP24]], i64 16, i1 false) -// CHECK1-NEXT: [[TMP25:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP25]] to i1 -// CHECK1-NEXT: [[TMP26:%.*]] = sext i1 [[TOBOOL]] to i32 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP21]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, i64* [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP21]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_11]], align 8 -// CHECK1-NEXT: store i64 [[TMP29]], i64* [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP21]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, i64* [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP21]], i32 0, i32 9 -// CHECK1-NEXT: [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP32]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP33:%.*]] = load i64, i64* [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i8* [[TMP19]], i32 [[TMP26]], i64* [[TMP27]], i64* [[TMP28]], i64 [[TMP33]], i32 1, i32 2, i64 [[TMP34]], i8* null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[DIV]] to i64 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB11]], 1 +// CHECK1-NEXT: [[DIV12:%.*]] = udiv i32 [[ADD]], 1 +// CHECK1-NEXT: [[CONV13:%.*]] = zext i32 [[DIV12]] to i64 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV9]], [[CONV13]] +// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK1-NEXT: store i64 [[SUB14]], i64* [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1, i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.6*)* @.omp_task_entry..7 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to %struct.kmp_task_t_with_privates.6* +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], %struct.kmp_task_t_with_privates.6* [[TMP27]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP28]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = load i8*, i8** [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast %struct.anon.5* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP30]], i8* align 8 [[TMP31]], i64 16, i1 false) +// CHECK1-NEXT: [[TMP32:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP32]] to i1 +// CHECK1-NEXT: [[TMP33:%.*]] = sext i1 [[TOBOOL15]] to i32 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP28]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, i64* [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP28]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: store i64 [[TMP36]], i64* [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP28]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, i64* [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP28]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP39]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP40:%.*]] = load i64, i64* [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = zext i32 [[TMP15]] to i64 +// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i8* [[TMP26]], i32 [[TMP33]], i64* [[TMP34]], i64* [[TMP35]], i64 [[TMP40]], i32 1, i32 2, i64 [[TMP41]], i8* null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..7 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.6* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 @@ -466,7 +479,7 @@ // CHECK1-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_2_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_3_I:%.*]] = alloca i32, align 4 @@ -477,17 +490,17 @@ // CHECK1-NEXT: [[J15_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.3*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.6*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.3* [[TMP1]], %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.6* [[TMP1]], %struct.kmp_task_t_with_privates.6** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.3*, %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.6*, %struct.kmp_task_t_with_privates.6** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], %struct.kmp_task_t_with_privates.6* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.3* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.5* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.6* [[TMP3]] to i8* // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 @@ -513,26 +526,26 @@ // CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !40 // CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !40 // CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !40 -// CHECK1-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !40 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !40 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP20]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.anon.5* [[TMP8]], %struct.anon.5** [[__CONTEXT_ADDR_I]], align 8, !noalias !40 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR_I]], align 8, !noalias !40 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP20]], i32 0, i32 0 // CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP21]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 // CHECK1-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !40 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP20]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP20]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP24]], align 8 // CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 // CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !40 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP20]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP20]], i32 0, i32 1 // CHECK1-NEXT: [[TMP28:%.*]] = load i8***, i8**** [[TMP27]], align 8 // CHECK1-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP20]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP20]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 // CHECK1-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP32]] to i64 // CHECK1-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i8*, i8** [[TMP29]], i64 [[IDXPROM_I]] // CHECK1-NEXT: [[TMP33:%.*]] = load i8*, i8** [[ARRAYIDX_I]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP20]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP20]], i32 0, i32 0 // CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP34]], align 8 // CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 // CHECK1-NEXT: [[IDXPROM4_I:%.*]] = sext i32 [[TMP36]] to i64 @@ -564,9 +577,9 @@ // CHECK1: taskloop.if.then.i: // CHECK1-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !40 // CHECK1-NEXT: store i64 [[TMP45]], i64* [[DOTOMP_IV_I]], align 8, !noalias !40 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP20]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP20]], i32 0, i32 0 // CHECK1-NEXT: [[TMP47:%.*]] = load i32*, i32** [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP20]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP20]], i32 0, i32 1 // CHECK1-NEXT: [[TMP49:%.*]] = load i8***, i8**** [[TMP48]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: @@ -615,44 +628,47 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 1 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.5*)* @.omp_task_entry..10 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.kmp_task_t_with_privates.5* -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, i64* [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, i64* [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, i64* [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 9 -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP11]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP9]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP4]], i32 1, i64* [[TMP7]], i64* [[TMP8]], i64 [[TMP12]], i32 1, i32 0, i64 0, i8* null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.9*)* @.omp_task_entry..10 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates.9* +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], %struct.kmp_task_t_with_privates.9* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, i64* [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, i64* [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, i64* [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP12]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP10]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* [[TMP5]], i32 1, i64* [[TMP8]], i64* [[TMP9]], i64 [[TMP13]], i32 1, i32 0, i64 0, i8* null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..10 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.9* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 @@ -664,22 +680,22 @@ // CHECK1-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.8*, align 8 // CHECK1-NEXT: [[I_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[CLEANUP_DEST_SLOT_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.5*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.9*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.5* [[TMP1]], %struct.kmp_task_t_with_privates.5** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.9* [[TMP1]], %struct.kmp_task_t_with_privates.9** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.5*, %struct.kmp_task_t_with_privates.5** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.9*, %struct.kmp_task_t_with_privates.9** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], %struct.kmp_task_t_with_privates.9* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.4* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.5* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.8* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.9* [[TMP3]] to i8* // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 @@ -705,8 +721,8 @@ // CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !52 // CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !52 // CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !52 -// CHECK1-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !52 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !52 +// CHECK1-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !52 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !52 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !52 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 // CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !52 @@ -774,7 +790,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 @@ -782,89 +798,96 @@ // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP1]] to i1 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S* [[THIS1]], %struct.S** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[C_ADDR]], i32** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK1-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL3]], i8* [[CONV]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S*, i32*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.S* [[THIS1]], i32* [[C_ADDR]], i64 [[TMP2]]) +// CHECK1-NEXT: store i8 [[FROMBOOL3]], i8* [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S* noundef [[THIS:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 8 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK1-NEXT: br i1 [[TMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store %struct.S* [[TMP0]], %struct.S** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[TMP1]], i32** [[TMP7]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 -// CHECK1-NEXT: store i32* [[TMP]], i32** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP12]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK1-NEXT: store i32* [[TMP]], i32** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = select i1 [[TOBOOL]], i32 2, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = or i32 [[TMP11]], 1 -// CHECK1-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 [[TMP12]], i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.7*)* @.omp_task_entry..13 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to %struct.kmp_task_t_with_privates.7* -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], %struct.kmp_task_t_with_privates.7* [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.anon.6* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 16, i1 false) -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, i64* [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: store i64 [[CONV5]], i64* [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, i64* [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 9 -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP24]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[TMP22]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i8* [[TMP13]], i32 1, i64* [[TMP19]], i64* [[TMP20]], i64 [[TMP25]], i32 1, i32 2, i64 4, i8* null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB5]], i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = select i1 [[TOBOOL1]], i32 2, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 [[TMP17]], i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.12*)* @.omp_task_entry..13 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to %struct.kmp_task_t_with_privates.12* +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_12:%.*]], %struct.kmp_task_t_with_privates.12* [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.anon.11* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP22]], i8* align 8 [[TMP23]], i64 16, i1 false) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, i64* [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK1-NEXT: store i64 [[CONV]], i64* [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, i64* [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP29]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, i64* [[TMP27]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i8* [[TMP18]], i32 1, i64* [[TMP24]], i64* [[TMP25]], i64 [[TMP30]], i32 1, i32 2, i64 4, i8* null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..13 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.7* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.12* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 @@ -876,7 +899,7 @@ // CHECK1-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.11*, align 8 // CHECK1-NEXT: [[TMP_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP1_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__I:%.*]] = alloca i32, align 4 @@ -887,17 +910,17 @@ // CHECK1-NEXT: [[TMP6_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.7*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.12*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.7* [[TMP1]], %struct.kmp_task_t_with_privates.7** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.12* [[TMP1]], %struct.kmp_task_t_with_privates.12** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.7*, %struct.kmp_task_t_with_privates.7** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], %struct.kmp_task_t_with_privates.7* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.12*, %struct.kmp_task_t_with_privates.12** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_12:%.*]], %struct.kmp_task_t_with_privates.12* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.6* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.7* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.11* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.12* [[TMP3]] to i8* // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 @@ -923,12 +946,12 @@ // CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !64 // CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !64 // CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !64 -// CHECK1-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP20]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.anon.11* [[TMP8]], %struct.anon.11** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP20]], i32 0, i32 0 // CHECK1-NEXT: [[TMP22:%.*]] = load %struct.S*, %struct.S** [[TMP21]], align 8 // CHECK1-NEXT: store i32* [[TMP_I]], i32** [[TMP1_I]], align 8, !noalias !64 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP20]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP20]], i32 0, i32 1 // CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP23]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !64 @@ -946,7 +969,7 @@ // CHECK1-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !64 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP29]] to i32 // CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !64 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP20]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP20]], i32 0, i32 1 // CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: diff --git a/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp --- a/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp @@ -207,6 +207,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 16 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) // CHECK1-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]]) @@ -218,15 +219,23 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT]], double noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]], double noundef 3.000000e+00) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S]*, %struct.S*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], i32* [[T_VAR]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[VAR]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] @@ -234,8 +243,8 @@ // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]]) #[[ATTR4]] // CHECK1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP6]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ev @@ -262,53 +271,52 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 8 dereferenceable(16) [[S_ARR:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 -// CHECK1-NEXT: br i1 [[TMP7]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[TMP1]], i32** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store [2 x %struct.S]* [[TMP2]], [2 x %struct.S]** [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 4 -// CHECK1-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP12]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 9, i64 120, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP14]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP19]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP20]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP2]], [2 x i32]** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S]* [[TMP6]], [2 x %struct.S]** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 4 +// CHECK1-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP17]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP18:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 9, i64 120, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP22]], i8* align 8 [[TMP23]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP24]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP25]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: @@ -318,24 +326,24 @@ // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP19]], i32 0, i32 1 -// CHECK1-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP21]]) -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP22]] to i32 (i32, i8*)** -// CHECK1-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_destructor. to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, i64* [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, i64* [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, i64* [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 9 -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP28]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP29:%.*]] = load i64, i64* [[TMP26]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i8* [[TMP13]], i32 1, i64* [[TMP24]], i64* [[TMP25]], i64 [[TMP29]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP24]], i32 0, i32 1 +// CHECK1-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP26]]) +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP27]] to i32 (i32, i8*)** +// CHECK1-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_destructor. to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, i64* [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, i64* [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP33]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, i64* [[TMP31]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i8* [[TMP18]], i32 1, i64* [[TMP29]], i64* [[TMP30]], i64 [[TMP34]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -388,7 +396,7 @@ // CHECK1-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTLASTPRIV_PTR_ADDR_I:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[DOTLASTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTLASTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x %struct.S]*, align 8 @@ -406,7 +414,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -435,25 +443,25 @@ // CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* // CHECK1-NEXT: call void [[TMP25]](i8* [[TMP24]], %struct.S** [[DOTLASTPRIV_PTR_ADDR_I]], i32** [[DOTLASTPRIV_PTR_ADDR1_I]], [2 x %struct.S]** [[DOTLASTPRIV_PTR_ADDR2_I]], [2 x i32]** [[DOTLASTPRIV_PTR_ADDR3_I]], i32** [[DOTLASTPRIV_PTR_ADDR4_I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP22]], i32 0, i32 3 // CHECK1-NEXT: [[TMP27:%.*]] = load %struct.S*, %struct.S** [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP22]], i32 0, i32 1 // CHECK1-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP22]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP22]], i32 0, i32 0 // CHECK1-NEXT: [[TMP33:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP22]], i32 0, i32 2 // CHECK1-NEXT: [[TMP35:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP22]], i32 0, i32 3 // CHECK1-NEXT: [[TMP37:%.*]] = load %struct.S*, %struct.S** [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP22]], i32 0, i32 4 // CHECK1-NEXT: [[TMP39:%.*]] = load i32*, i32** [[TMP38]], align 8 // CHECK1-NEXT: [[TMP40:%.*]] = load %struct.S*, %struct.S** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -592,39 +600,48 @@ // CHECK1-SAME: () #[[ATTR9:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TTT]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 128 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], i32* [[T_VAR]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[VAR]]) +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP6]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ev @@ -662,137 +679,136 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 -// CHECK1-NEXT: br i1 [[TMP7]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[TMP1]], i32** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[TMP2]], [2 x %struct.S.0]** [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP11]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP12:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 9, i64 256, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_entry..5 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to %struct.kmp_task_t_with_privates.2* -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 128 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast %struct.anon.1* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP16]], i8* align 8 [[TMP17]], i64 32, i1 false) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP13]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP18]], i32 0, i32 2 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP19]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP2]], [2 x i32]** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S.1]* [[TMP6]], [2 x %struct.S.1]** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP16]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP17:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 9, i64 256, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.4*)* @.omp_task_entry..5 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to %struct.kmp_task_t_with_privates.4* +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], %struct.kmp_task_t_with_privates.4* [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 128 +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.anon.3* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 32, i1 false) +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], %struct.kmp_task_t_with_privates.4* [[TMP18]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], %struct..kmp_privates.t.5* [[TMP23]], i32 0, i32 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP24]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP18]], i32 0, i32 3 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP20]]) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP22:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP21]] to i32 (i32, i8*)** -// CHECK1-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_destructor..7 to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, i64* [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, i64* [[TMP24]], align 16 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, i64* [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 9 -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP27]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP28:%.*]] = load i64, i64* [[TMP25]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i8* [[TMP12]], i32 1, i64* [[TMP23]], i64* [[TMP24]], i64 [[TMP28]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2*, i32)* @.omp_task_dup..6 to i8*)) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP23]], i32 0, i32 3 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP25]]) +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP26]] to i32 (i32, i8*)** +// CHECK1-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.4*)* @.omp_task_destructor..7 to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP19]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, i64* [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP19]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, i64* [[TMP29]], align 16 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP19]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, i64* [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP19]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP32]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, i64* [[TMP30]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i8* [[TMP17]], i32 1, i64* [[TMP28]], i64* [[TMP29]], i64 [[TMP33]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates.4*, %struct.kmp_task_t_with_privates.4*, i32)* @.omp_task_dup..6 to i8*)) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_privates_map..4 -// CHECK1-SAME: (%struct..kmp_privates.t.3* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]], [2 x i32]** noalias noundef [[TMP2:%.*]], [2 x %struct.S.0]** noalias noundef [[TMP3:%.*]], %struct.S.0** noalias noundef [[TMP4:%.*]]) #[[ATTR6]] { +// CHECK1-SAME: (%struct..kmp_privates.t.5* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]], [2 x i32]** noalias noundef [[TMP2:%.*]], [2 x %struct.S.1]** noalias noundef [[TMP3:%.*]], %struct.S.1** noalias noundef [[TMP4:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.3*, align 8 +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.5*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 // CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca [2 x i32]**, align 8 -// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca [2 x %struct.S.0]**, align 8 -// CHECK1-NEXT: [[DOTADDR4:%.*]] = alloca %struct.S.0**, align 8 -// CHECK1-NEXT: store %struct..kmp_privates.t.3* [[TMP0]], %struct..kmp_privates.t.3** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca [2 x %struct.S.1]**, align 8 +// CHECK1-NEXT: [[DOTADDR4:%.*]] = alloca %struct.S.1**, align 8 +// CHECK1-NEXT: store %struct..kmp_privates.t.5* [[TMP0]], %struct..kmp_privates.t.5** [[DOTADDR]], align 8 // CHECK1-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 // CHECK1-NEXT: store [2 x i32]** [[TMP2]], [2 x i32]*** [[DOTADDR2]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]** [[TMP3]], [2 x %struct.S.0]*** [[DOTADDR3]], align 8 -// CHECK1-NEXT: store %struct.S.0** [[TMP4]], %struct.S.0*** [[DOTADDR4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load %struct..kmp_privates.t.3*, %struct..kmp_privates.t.3** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x %struct.S.1]** [[TMP3]], [2 x %struct.S.1]*** [[DOTADDR3]], align 8 +// CHECK1-NEXT: store %struct.S.1** [[TMP4]], %struct.S.1*** [[DOTADDR4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load %struct..kmp_privates.t.5*, %struct..kmp_privates.t.5** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], %struct..kmp_privates.t.5* [[TMP5]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 // CHECK1-NEXT: store i32* [[TMP6]], i32** [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP5]], i32 0, i32 1 // CHECK1-NEXT: [[TMP9:%.*]] = load [2 x i32]**, [2 x i32]*** [[DOTADDR2]], align 8 // CHECK1-NEXT: store [2 x i32]* [[TMP8]], [2 x i32]** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load [2 x %struct.S.0]**, [2 x %struct.S.0]*** [[DOTADDR3]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[TMP10]], [2 x %struct.S.0]** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP13:%.*]] = load %struct.S.0**, %struct.S.0*** [[DOTADDR4]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP12]], %struct.S.0** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP5]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP11:%.*]] = load [2 x %struct.S.1]**, [2 x %struct.S.1]*** [[DOTADDR3]], align 8 +// CHECK1-NEXT: store [2 x %struct.S.1]* [[TMP10]], [2 x %struct.S.1]** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP5]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP13:%.*]] = load %struct.S.1**, %struct.S.1*** [[DOTADDR4]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP13]], align 8 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..5 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.4* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 @@ -804,27 +820,27 @@ // CHECK1-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTLASTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTLASTPRIV_PTR_ADDR1_I:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[DOTLASTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[DOTLASTPRIV_PTR_ADDR3_I:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[DOTLASTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x %struct.S.1]*, align 8 +// CHECK1-NEXT: [[DOTLASTPRIV_PTR_ADDR3_I:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[I_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.4*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.4* [[TMP1]], %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.4*, %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], %struct.kmp_task_t_with_privates.4* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 128 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.3* [[TMP9]] to i8* -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.2* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.3* +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], %struct.kmp_task_t_with_privates.4* [[TMP3]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.5* [[TMP9]] to i8* +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.4* [[TMP3]] to i8* // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 @@ -843,35 +859,35 @@ // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !28 // CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !28 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.3*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* @.omp_task_privates_map..4 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.5*, i32**, [2 x i32]**, [2 x %struct.S.1]**, %struct.S.1**)* @.omp_task_privates_map..4 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !28 // CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !28 -// CHECK1-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 -// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: store %struct.anon.3* [[TMP8]], %struct.anon.3** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !28 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* -// CHECK1-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTLASTPRIV_PTR_ADDR_I]], [2 x i32]** [[DOTLASTPRIV_PTR_ADDR1_I]], [2 x %struct.S.0]** [[DOTLASTPRIV_PTR_ADDR2_I]], %struct.S.0** [[DOTLASTPRIV_PTR_ADDR3_I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**, [2 x i32]**, [2 x %struct.S.1]**, %struct.S.1**)* +// CHECK1-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTLASTPRIV_PTR_ADDR_I]], [2 x i32]** [[DOTLASTPRIV_PTR_ADDR1_I]], [2 x %struct.S.1]** [[DOTLASTPRIV_PTR_ADDR2_I]], %struct.S.1** [[DOTLASTPRIV_PTR_ADDR3_I]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP22]], i32 0, i32 1 // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP22]], i32 0, i32 0 // CHECK1-NEXT: [[TMP29:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP31:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP33:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP35:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP37:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP22]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP31:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP22]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP33:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP22]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP35:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP22]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP37:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP36]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: [[TMP39:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !28 -// CHECK1-NEXT: [[TMP40:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[DOTLASTPRIV_PTR_ADDR2_I]], align 8, !noalias !28 -// CHECK1-NEXT: [[TMP41:%.*]] = load %struct.S.0*, %struct.S.0** [[DOTLASTPRIV_PTR_ADDR3_I]], align 8, !noalias !28 +// CHECK1-NEXT: [[TMP40:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[DOTLASTPRIV_PTR_ADDR2_I]], align 8, !noalias !28 +// CHECK1-NEXT: [[TMP41:%.*]] = load %struct.S.1*, %struct.S.1** [[DOTLASTPRIV_PTR_ADDR3_I]], align 8, !noalias !28 // CHECK1-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP42]] to i32 // CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !28 @@ -888,9 +904,9 @@ // CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP38]], align 128 // CHECK1-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP39]], i64 0, i64 0 // CHECK1-NEXT: store i32 [[TMP46]], i32* [[ARRAYIDX_I]], align 4 -// CHECK1-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP40]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5_I]] to i8* -// CHECK1-NEXT: [[TMP48:%.*]] = bitcast %struct.S.0* [[TMP41]] to i8* +// CHECK1-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP40]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %struct.S.1* [[ARRAYIDX5_I]] to i8* +// CHECK1-NEXT: [[TMP48:%.*]] = bitcast %struct.S.1* [[TMP41]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 4, i1 false) #[[ATTR4]] // CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !28 // CHECK1-NEXT: [[ADD6_I:%.*]] = add nsw i32 [[TMP49]], 1 @@ -906,23 +922,23 @@ // CHECK1-NEXT: [[TMP53:%.*]] = bitcast [2 x i32]* [[TMP29]] to i8* // CHECK1-NEXT: [[TMP54:%.*]] = bitcast [2 x i32]* [[TMP39]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP53]], i8* align 4 [[TMP54]], i64 8, i1 false) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN_I:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP31]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP55:%.*]] = bitcast [2 x %struct.S.0]* [[TMP40]] to %struct.S.0* -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN_I]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN_I:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP31]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP55:%.*]] = bitcast [2 x %struct.S.1]* [[TMP40]] to %struct.S.1* +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr [[STRUCT_S_1:%.*]], %struct.S.1* [[ARRAY_BEGIN_I]], i64 2 // CHECK1-NEXT: br label [[OMP_ARRAYCPY_BODY_I:%.*]] // CHECK1: omp.arraycpy.body.i: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST_I:%.*]] = phi %struct.S.0* [ [[TMP55]], [[DOTOMP_LASTPRIVATE_THEN_I]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT_I:%.*]], [[OMP_ARRAYCPY_BODY_I]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST_I:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN_I]], [[DOTOMP_LASTPRIVATE_THEN_I]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT_I:%.*]], [[OMP_ARRAYCPY_BODY_I]] ] -// CHECK1-NEXT: [[TMP57:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST_I]] to i8* -// CHECK1-NEXT: [[TMP58:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST_I]] to i8* +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST_I:%.*]] = phi %struct.S.1* [ [[TMP55]], [[DOTOMP_LASTPRIVATE_THEN_I]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT_I:%.*]], [[OMP_ARRAYCPY_BODY_I]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST_I:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN_I]], [[DOTOMP_LASTPRIVATE_THEN_I]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT_I:%.*]], [[OMP_ARRAYCPY_BODY_I]] ] +// CHECK1-NEXT: [[TMP57:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST_I]] to i8* +// CHECK1-NEXT: [[TMP58:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST_I]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP57]], i8* align 4 [[TMP58]], i64 4, i1 false) #[[ATTR4]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT_I]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST_I]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT_I]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST_I]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE_I:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT_I]], [[TMP56]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT_I]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST_I]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT_I]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST_I]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE_I:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT_I]], [[TMP56]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE_I]], label [[OMP_ARRAYCPY_DONE7_I:%.*]], label [[OMP_ARRAYCPY_BODY_I]] // CHECK1: omp.arraycpy.done7.i: -// CHECK1-NEXT: [[TMP59:%.*]] = bitcast %struct.S.0* [[TMP35]] to i8* -// CHECK1-NEXT: [[TMP60:%.*]] = bitcast %struct.S.0* [[TMP41]] to i8* +// CHECK1-NEXT: [[TMP59:%.*]] = bitcast %struct.S.1* [[TMP35]] to i8* +// CHECK1-NEXT: [[TMP60:%.*]] = bitcast %struct.S.1* [[TMP41]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP59]], i8* align 4 [[TMP60]], i64 4, i1 false) #[[ATTR4]] // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__3_EXIT]] // CHECK1: .omp_outlined..3.exit: @@ -930,57 +946,57 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_dup..6 -// CHECK1-SAME: (%struct.kmp_task_t_with_privates.2* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (%struct.kmp_task_t_with_privates.4* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.4* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR7]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates.4*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.4*, align 8 // CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP0]], %struct.kmp_task_t_with_privates.2** [[DOTADDR]], align 8 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.4* [[TMP0]], %struct.kmp_task_t_with_privates.4** [[DOTADDR]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.4* [[TMP1]], %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTADDR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.4*, %struct.kmp_task_t_with_privates.4** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], %struct.kmp_task_t_with_privates.4* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTADDR2]], align 4 // CHECK1-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 64 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP7]], i32 0, i32 2 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP8]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], %struct.kmp_task_t_with_privates.4* [[TMP3]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], %struct..kmp_privates.t.5* [[TMP7]], i32 0, i32 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP7]], i32 0, i32 3 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP9]]) +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP7]], i32 0, i32 3 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP9]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_destructor..7 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.4* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.4*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP2]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 3 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP5]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.4* [[TMP1]], %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.kmp_task_t_with_privates.4*, %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], %struct.kmp_task_t_with_privates.4* [[TMP2]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], %struct..kmp_privates.t.5* [[TMP3]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP3]], i32 0, i32 3 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP5]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done2: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 @@ -988,46 +1004,46 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, i32* [[F]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -1042,46 +1058,49 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK3-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK3-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK3-NEXT: store double* @g, double** [[TMP4]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK3-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP5]], align 8 -// CHECK3-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK3-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct.kmp_task_t_with_privates* -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 [[TMP11]], i64 16, i1 false) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 5 -// CHECK3-NEXT: store i64 0, i64* [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 6 -// CHECK3-NEXT: store i64 9, i64* [[TMP14]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 7 -// CHECK3-NEXT: store i64 1, i64* [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 9 -// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i8* -// CHECK3-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP17]], i8 0, i64 8, i1 false) -// CHECK3-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP15]], align 8 -// CHECK3-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP6]], i32 1, i64* [[TMP13]], i64* [[TMP14]], i64 [[TMP18]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) -// CHECK3-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: store double* @g, double** [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP6]], align 8 +// CHECK3-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP7:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.kmp_task_t_with_privates* +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP8]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP9]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP8]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP9]], i32 0, i32 5 +// CHECK3-NEXT: store i64 0, i64* [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP9]], i32 0, i32 6 +// CHECK3-NEXT: store i64 9, i64* [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP9]], i32 0, i32 7 +// CHECK3-NEXT: store i64 1, i64* [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP9]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i8* +// CHECK3-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP18]], i8 0, i64 8, i1 false) +// CHECK3-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP16]], align 8 +// CHECK3-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* [[TMP7]], i32 1, i64* [[TMP14]], i64* [[TMP15]], i64 [[TMP19]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) +// CHECK3-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -1119,12 +1138,12 @@ // CHECK3-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK3-NEXT: [[DOTLASTPRIV_PTR_ADDR_I:%.*]] = alloca double*, align 8 // CHECK3-NEXT: [[DOTLASTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[I_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP_I:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK3-NEXT: [[REF_TMP_I:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 @@ -1135,7 +1154,7 @@ // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK3-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -1164,15 +1183,15 @@ // CHECK3-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK3-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK3-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: [[TMP22:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, double**, i32**)* // CHECK3-NEXT: call void [[TMP25]](i8* [[TMP24]], double** [[DOTLASTPRIV_PTR_ADDR_I]], i32** [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR3:[0-9]+]] -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP22]], i32 0, i32 0 // CHECK3-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP26]], align 8 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP22]], i32 0, i32 1 // CHECK3-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP28]], align 8 // CHECK3-NEXT: [[TMP30:%.*]] = load double*, double** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -1191,11 +1210,11 @@ // CHECK3-NEXT: store i32 [[TMP35]], i32* [[I_I]], align 4, !noalias !14 // CHECK3-NEXT: store double 1.000000e+00, double* [[TMP30]], align 8 // CHECK3-NEXT: store i32 11, i32* [[TMP31]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP_I]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP_I]], i32 0, i32 0 // CHECK3-NEXT: store double* [[TMP30]], double** [[TMP36]], align 8, !noalias !14 -// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP_I]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP_I]], i32 0, i32 1 // CHECK3-NEXT: store i32* [[TMP31]], i32** [[TMP37]], align 8, !noalias !14 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(16) [[REF_TMP_I]]) #[[ATTR3]] +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(16) [[REF_TMP_I]]) #[[ATTR3]] // CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK3-NEXT: [[ADD3_I:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK3-NEXT: store i32 [[ADD3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 @@ -1248,54 +1267,58 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK4-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK4-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK4-NEXT: store double* @g, double** [[TMP4]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK4-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP5]], align 8 -// CHECK4-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK4-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct.kmp_task_t_with_privates* -// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 [[TMP11]], i64 16, i1 false) -// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 5 -// CHECK4-NEXT: store i64 0, i64* [[TMP13]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 6 -// CHECK4-NEXT: store i64 9, i64* [[TMP14]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 7 -// CHECK4-NEXT: store i64 1, i64* [[TMP15]], align 8 -// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 9 -// CHECK4-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i8* -// CHECK4-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP17]], i8 0, i64 8, i1 false) -// CHECK4-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP15]], align 8 -// CHECK4-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP6]], i32 1, i64* [[TMP13]], i64* [[TMP14]], i64 [[TMP18]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) -// CHECK4-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK4-NEXT: store double* @g, double** [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK4-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP6]], align 8 +// CHECK4-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP7:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.kmp_task_t_with_privates* +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP8]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP9]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP8]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP9]], i32 0, i32 5 +// CHECK4-NEXT: store i64 0, i64* [[TMP14]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP9]], i32 0, i32 6 +// CHECK4-NEXT: store i64 9, i64* [[TMP15]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP9]], i32 0, i32 7 +// CHECK4-NEXT: store i64 1, i64* [[TMP16]], align 8 +// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP9]], i32 0, i32 9 +// CHECK4-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i8* +// CHECK4-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP18]], i8 0, i64 8, i1 false) +// CHECK4-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP16]], align 8 +// CHECK4-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* [[TMP7]], i32 1, i64* [[TMP14]], i64* [[TMP15]], i64 [[TMP19]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) +// CHECK4-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: ret void @@ -1348,7 +1371,7 @@ // CHECK4-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK4-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK4-NEXT: [[DOTLASTPRIV_PTR_ADDR_I:%.*]] = alloca double*, align 8 // CHECK4-NEXT: [[DOTLASTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[I_I:%.*]] = alloca i32, align 4 @@ -1364,7 +1387,7 @@ // CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK4-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK4-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -1393,15 +1416,15 @@ // CHECK4-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK4-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK4-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: [[TMP22:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, double**, i32**)* // CHECK4-NEXT: call void [[TMP25]](i8* [[TMP24]], double** [[DOTLASTPRIV_PTR_ADDR_I]], i32** [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR4:[0-9]+]] -// CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP22]], i32 0, i32 0 // CHECK4-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP26]], align 8 -// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP22]], i32 0, i32 1 // CHECK4-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP28]], align 8 // CHECK4-NEXT: [[TMP30:%.*]] = load double*, double** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -1485,67 +1508,74 @@ // CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK5-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK5-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK5-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, float**, %struct.St**)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], float** [[A_ADDR]], %struct.St** [[S_ADDR]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store float** [[A_ADDR]], float*** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.St** [[S_ADDR]], %struct.St*** [[TMP4]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], %struct.St** noundef nonnull align 8 dereferenceable(8) [[S:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[S_ADDR:%.*]] = alloca %struct.St**, align 8 -// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK5-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK5-NEXT: store %struct.St** [[S]], %struct.St*** [[S_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.St**, %struct.St*** [[S_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK5-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.St**, %struct.St*** [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK5-NEXT: br i1 [[TMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK5-NEXT: store i64 [[TMP0]], i64* [[TMP7]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK5-NEXT: store float** [[TMP1]], float*** [[TMP8]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK5-NEXT: store %struct.St** [[TMP2]], %struct.St*** [[TMP9]], align 8 -// CHECK5-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP10:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1, i64 96, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK5-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.kmp_task_t_with_privates* -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP11]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP12]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8 -// CHECK5-NEXT: [[TMP15:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP11]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP12]], i32 0, i32 5 -// CHECK5-NEXT: store i64 0, i64* [[TMP17]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP12]], i32 0, i32 6 -// CHECK5-NEXT: store i64 9, i64* [[TMP18]], align 8 -// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP12]], i32 0, i32 7 -// CHECK5-NEXT: store i64 1, i64* [[TMP19]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP12]], i32 0, i32 9 -// CHECK5-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i8* -// CHECK5-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP21]], i8 0, i64 8, i1 false) -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[TMP19]], align 8 -// CHECK5-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP10]], i32 1, i64* [[TMP17]], i64* [[TMP18]], i64 [[TMP22]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) -// CHECK5-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK5-NEXT: store float** [[TMP4]], float*** [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.St** [[TMP6]], %struct.St*** [[TMP13]], align 8 +// CHECK5-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP14:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 1, i64 96, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK5-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to %struct.kmp_task_t_with_privates* +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP15]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP16]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i8*, i8** [[TMP17]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP18]], i8* align 8 [[TMP19]], i64 24, i1 false) +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP15]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP16]], i32 0, i32 5 +// CHECK5-NEXT: store i64 0, i64* [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP16]], i32 0, i32 6 +// CHECK5-NEXT: store i64 9, i64* [[TMP22]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP16]], i32 0, i32 7 +// CHECK5-NEXT: store i64 1, i64* [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP16]], i32 0, i32 9 +// CHECK5-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i8* +// CHECK5-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP25]], i8 0, i64 8, i1 false) +// CHECK5-NEXT: [[TMP26:%.*]] = load i64, i64* [[TMP23]], align 8 +// CHECK5-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i8* [[TMP14]], i32 1, i64* [[TMP21]], i64* [[TMP22]], i64 [[TMP26]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) +// CHECK5-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: // CHECK5-NEXT: ret void @@ -1583,7 +1613,7 @@ // CHECK5-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK5-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK5-NEXT: [[DOTLASTPRIV_PTR_ADDR_I:%.*]] = alloca float**, align 8 // CHECK5-NEXT: [[DOTLASTPRIV_PTR_ADDR1_I:%.*]] = alloca %struct.St**, align 8 // CHECK5-NEXT: [[I_I:%.*]] = alloca i32, align 4 @@ -1598,7 +1628,7 @@ // CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK5-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK5-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -1627,17 +1657,17 @@ // CHECK5-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK5-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// CHECK5-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK5-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: [[TMP22:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP22]], i32 0, i32 0 // CHECK5-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 // CHECK5-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: [[TMP27:%.*]] = bitcast void (i8*, ...)* [[TMP25]] to void (i8*, float***, %struct.St***)* // CHECK5-NEXT: call void [[TMP27]](i8* [[TMP26]], float*** [[DOTLASTPRIV_PTR_ADDR_I]], %struct.St*** [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR2:[0-9]+]] -// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP22]], i32 0, i32 1 // CHECK5-NEXT: [[TMP29:%.*]] = load float**, float*** [[TMP28]], align 8 -// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP22]], i32 0, i32 2 // CHECK5-NEXT: [[TMP31:%.*]] = load %struct.St**, %struct.St*** [[TMP30]], align 8 // CHECK5-NEXT: [[TMP32:%.*]] = load float**, float*** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: [[TMP33:%.*]] = load %struct.St**, %struct.St*** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -1694,52 +1724,57 @@ // CHECK6-SAME: () #[[ATTR0:[0-9]+]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[I]]) +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store i32* [[I]], i32** [[TMP0]], align 8 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK6-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK6-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK6-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK6-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK6-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK6-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK6: omp_if.then: -// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK6-NEXT: store i32* [[TMP0]], i32** [[TMP5]], align 8 -// CHECK6-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i64 88, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK6-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct.kmp_task_t_with_privates* -// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK6-NEXT: [[TMP11:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 [[TMP11]], i64 8, i1 false) -// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 5 -// CHECK6-NEXT: store i64 0, i64* [[TMP13]], align 8 -// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 6 -// CHECK6-NEXT: store i64 9, i64* [[TMP14]], align 8 -// CHECK6-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 7 -// CHECK6-NEXT: store i64 1, i64* [[TMP15]], align 8 -// CHECK6-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 9 -// CHECK6-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i8* -// CHECK6-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP17]], i8 0, i64 8, i1 false) -// CHECK6-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP15]], align 8 -// CHECK6-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* [[TMP6]], i32 1, i64* [[TMP13]], i64* [[TMP14]], i64 [[TMP18]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) -// CHECK6-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK6-NEXT: store i32* [[TMP2]], i32** [[TMP7]], align 8 +// CHECK6-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1, i64 88, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK6-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct.kmp_task_t_with_privates* +// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP9]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP12:%.*]] = load i8*, i8** [[TMP11]], align 8 +// CHECK6-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 [[TMP13]], i64 8, i1 false) +// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP9]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 5 +// CHECK6-NEXT: store i64 0, i64* [[TMP15]], align 8 +// CHECK6-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 6 +// CHECK6-NEXT: store i64 9, i64* [[TMP16]], align 8 +// CHECK6-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 7 +// CHECK6-NEXT: store i64 1, i64* [[TMP17]], align 8 +// CHECK6-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 9 +// CHECK6-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i8* +// CHECK6-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP19]], i8 0, i64 8, i1 false) +// CHECK6-NEXT: [[TMP20:%.*]] = load i64, i64* [[TMP17]], align 8 +// CHECK6-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP8]], i32 1, i64* [[TMP15]], i64* [[TMP16]], i64 [[TMP20]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) +// CHECK6-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK6-NEXT: br label [[OMP_IF_END]] // CHECK6: omp_if.end: // CHECK6-NEXT: ret void @@ -1772,7 +1807,7 @@ // CHECK6-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK6-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK6-NEXT: [[DOTLASTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 // CHECK6-NEXT: [[I_I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 @@ -1786,7 +1821,7 @@ // CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK6-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK6-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK6-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK6-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK6-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -1815,13 +1850,13 @@ // CHECK6-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // CHECK6-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK6-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// CHECK6-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK6-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: [[TMP22:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK6-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK6-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK6-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**)* // CHECK6-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR2:[0-9]+]] -// CHECK6-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP22]], i32 0, i32 0 // CHECK6-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 // CHECK6-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK6-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp --- a/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp @@ -60,14 +60,13 @@ // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED7:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -76,91 +75,97 @@ // CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED2]] to i32* -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED2]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_4]], align 1 +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_3]], align 1 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_4]], align 1 -// CHECK1-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED7]] to i8* -// CHECK1-NEXT: [[FROMBOOL9:%.*]] = zext i1 [[TOBOOL6]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL9]], i8* [[CONV8]], align 1 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED7]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: [[CONV11:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED10]] to i32* -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV11]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED10]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_4]], align 1 -// CHECK1-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP13]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[I]], i32** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[ARGC_ADDR]], i32** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 2 +// CHECK1-NEXT: store i8*** [[ARGV_ADDR]], i8**** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP13:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_3]], align 1 +// CHECK1-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK1-NEXT: [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL6]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL7]], i8* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_3]], align 1 +// CHECK1-NEXT: [[TOBOOL8:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i8***, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP14]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP17]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 4 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP8]] to i32* -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, i64* [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, i64* [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, i64* [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 9 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP14]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP12]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP5]], i32 1, i64* [[TMP10]], i64* [[TMP11]], i64 [[TMP15]], i32 1, i32 0, i64 0, i8* null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP9]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP11]] to i32* +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, i64* [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, i64* [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP17]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP15]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP8]], i32 1, i64* [[TMP13]], i64* [[TMP14]], i64 [[TMP18]], i32 1, i32 0, i64 0, i8* null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -179,7 +184,7 @@ // CHECK1-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[I_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 @@ -192,7 +197,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8 @@ -219,8 +224,8 @@ // CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 // CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 @@ -243,47 +248,51 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..4 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates.1* -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, i64* [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, i64* [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, i64* [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 9 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP12]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP5]], i32 1, i64* [[TMP8]], i64* [[TMP9]], i64 [[TMP13]], i32 1, i32 1, i64 [[TMP14]], i8* null) -// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.3*)* @.omp_task_entry..4 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct.kmp_task_t_with_privates.3* +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP9]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, i64* [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, i64* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, i64* [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP15]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP8]], i32 1, i64* [[TMP11]], i64* [[TMP12]], i64 [[TMP16]], i32 1, i32 1, i64 [[TMP17]], i8* null) +// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..4 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 @@ -295,21 +304,21 @@ // CHECK1-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[I_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.1*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.3*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.1* [[TMP1]], %struct.kmp_task_t_with_privates.1** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.3* [[TMP1]], %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.1*, %struct.kmp_task_t_with_privates.1** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.3*, %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.1* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.3* [[TMP3]] to i8* // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 @@ -335,8 +344,8 @@ // CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !31 // CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !31 // CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !31 -// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 +// CHECK1-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !31 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 // CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !31 @@ -359,103 +368,107 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8***, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i8*** [[ARGV]], i8**** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8***, i8**** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8***, i8**** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 8 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[TMP1]], i32** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store i8*** [[TMP2]], i8**** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV3]], align 4 -// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i8**, i8*** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP13]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, i8* [[TMP15]], i64 [[IDXPROM8]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i8, i8* [[ARRAYIDX9]], align 1 -// CHECK1-NEXT: [[CONV10:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK1-NEXT: store i32 [[CONV10]], i32* [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store i8*** [[TMP6]], i8**** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i8**, i8*** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP21]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP23]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP25]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP26]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[CONV12:%.*]] = sext i32 [[DIV]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[SUB13:%.*]] = sub i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[SUB13]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB14]], 1 -// CHECK1-NEXT: [[DIV15:%.*]] = udiv i32 [[ADD]], 1 -// CHECK1-NEXT: [[CONV16:%.*]] = zext i32 [[DIV15]] to i64 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV12]], [[CONV16]] -// CHECK1-NEXT: [[SUB17:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK1-NEXT: store i64 [[SUB17]], i64* [[DOTCAPTURE_EXPR_11]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1, i64 88, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.3*)* @.omp_task_entry..7 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to %struct.kmp_task_t_with_privates.3* -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP22]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP23]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = load i8*, i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP25]], i8* align 8 [[TMP26]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], %struct.kmp_task_t_with_privates.3* [[TMP22]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP28:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP28]] to i1 -// CHECK1-NEXT: [[TMP29:%.*]] = sext i1 [[TOBOOL]] to i32 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP23]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, i64* [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP23]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_11]], align 8 -// CHECK1-NEXT: store i64 [[TMP32]], i64* [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP23]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, i64* [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP23]], i32 0, i32 9 -// CHECK1-NEXT: [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP35]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP36:%.*]] = load i64, i64* [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP21]], i32 [[TMP29]], i64* [[TMP30]], i64* [[TMP31]], i64 [[TMP36]], i32 1, i32 2, i64 [[TMP37]], i8* bitcast (void (%struct.kmp_task_t_with_privates.3*, %struct.kmp_task_t_with_privates.3*, i32)* @.omp_task_dup. to i8*)) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[DIV]] to i64 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB11]], 1 +// CHECK1-NEXT: [[DIV12:%.*]] = udiv i32 [[ADD]], 1 +// CHECK1-NEXT: [[CONV13:%.*]] = zext i32 [[DIV12]] to i64 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV9]], [[CONV13]] +// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK1-NEXT: store i64 [[SUB14]], i64* [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 1, i64 88, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.6*)* @.omp_task_entry..7 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast i8* [[TMP29]] to %struct.kmp_task_t_with_privates.6* +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], %struct.kmp_task_t_with_privates.6* [[TMP30]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP31]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load i8*, i8** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.anon.5* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP33]], i8* align 8 [[TMP34]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], %struct.kmp_task_t_with_privates.6* [[TMP30]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP36:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP36]] to i1 +// CHECK1-NEXT: [[TMP37:%.*]] = sext i1 [[TOBOOL15]] to i32 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP31]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, i64* [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP31]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: store i64 [[TMP40]], i64* [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP31]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, i64* [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP31]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP43]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP44:%.*]] = load i64, i64* [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i8* [[TMP29]], i32 [[TMP37]], i64* [[TMP38]], i64* [[TMP39]], i64 [[TMP44]], i32 1, i32 2, i64 [[TMP45]], i8* bitcast (void (%struct.kmp_task_t_with_privates.6*, %struct.kmp_task_t_with_privates.6*, i32)* @.omp_task_dup. to i8*)) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -476,7 +489,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..7 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.6* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 @@ -488,7 +501,7 @@ // CHECK1-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTLASTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_2_I:%.*]] = alloca i32, align 4 @@ -500,19 +513,19 @@ // CHECK1-NEXT: [[J15_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.3*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.6*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.3* [[TMP1]], %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.6* [[TMP1]], %struct.kmp_task_t_with_privates.6** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.3*, %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.6*, %struct.kmp_task_t_with_privates.6** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], %struct.kmp_task_t_with_privates.6* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], %struct.kmp_task_t_with_privates.3* [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.5* +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], %struct.kmp_task_t_with_privates.6* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.3* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.6* [[TMP3]] to i8* // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 @@ -538,33 +551,33 @@ // CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !47 // CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !47 // CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !47 -// CHECK1-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !47 -// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !47 +// CHECK1-NEXT: store %struct.anon.5* [[TMP8]], %struct.anon.5** [[__CONTEXT_ADDR_I]], align 8, !noalias !47 +// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR_I]], align 8, !noalias !47 // CHECK1-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !47 // CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !47 // CHECK1-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**)* // CHECK1-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP22]], i32 0, i32 0 // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !47 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 1 // CHECK1-NEXT: [[TMP30:%.*]] = load i32*, i32** [[TMP29]], align 8 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 // CHECK1-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 1 // CHECK1-NEXT: [[TMP33:%.*]] = load i32*, i32** [[TMP32]], align 8 // CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 // CHECK1-NEXT: store i32 [[TMP34]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 2 // CHECK1-NEXT: [[TMP36:%.*]] = load i8***, i8**** [[TMP35]], align 8 // CHECK1-NEXT: [[TMP37:%.*]] = load i8**, i8*** [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 1 // CHECK1-NEXT: [[TMP39:%.*]] = load i32*, i32** [[TMP38]], align 8 // CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 // CHECK1-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP40]] to i64 // CHECK1-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i8*, i8** [[TMP37]], i64 [[IDXPROM_I]] // CHECK1-NEXT: [[TMP41:%.*]] = load i8*, i8** [[ARRAYIDX_I]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 1 // CHECK1-NEXT: [[TMP43:%.*]] = load i32*, i32** [[TMP42]], align 8 // CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 // CHECK1-NEXT: [[IDXPROM4_I:%.*]] = sext i32 [[TMP44]] to i64 @@ -596,9 +609,9 @@ // CHECK1: taskloop.if.then.i: // CHECK1-NEXT: [[TMP53:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !47 // CHECK1-NEXT: store i64 [[TMP53]], i64* [[DOTOMP_IV_I]], align 8, !noalias !47 -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 1 // CHECK1-NEXT: [[TMP55:%.*]] = load i32*, i32** [[TMP54]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 2 // CHECK1-NEXT: [[TMP57:%.*]] = load i8***, i8**** [[TMP56]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: @@ -653,20 +666,20 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_dup. -// CHECK1-SAME: (%struct.kmp_task_t_with_privates.3* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (%struct.kmp_task_t_with_privates.6* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.6* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates.3*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.3*, align 8 +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates.6*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.6*, align 8 // CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.3* [[TMP0]], %struct.kmp_task_t_with_privates.3** [[DOTADDR]], align 8 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.3* [[TMP1]], %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.6* [[TMP0]], %struct.kmp_task_t_with_privates.6** [[DOTADDR]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.6* [[TMP1]], %struct.kmp_task_t_with_privates.6** [[DOTADDR1]], align 8 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTADDR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.3*, %struct.kmp_task_t_with_privates.3** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.6*, %struct.kmp_task_t_with_privates.6** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], %struct.kmp_task_t_with_privates.6* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTADDR2]], align 4 // CHECK1-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], %struct.kmp_task_t_with_privates.3* [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], %struct.kmp_task_t_with_privates.6* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: ret void // // @@ -696,7 +709,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 @@ -704,89 +717,96 @@ // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP1]] to i1 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S* [[THIS1]], %struct.S** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[C_ADDR]], i32** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK1-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL3]], i8* [[CONV]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S*, i32*, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.S* [[THIS1]], i32* [[C_ADDR]], i64 [[TMP2]]) +// CHECK1-NEXT: store i8 [[FROMBOOL3]], i8* [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S* noundef [[THIS:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 8 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK1-NEXT: br i1 [[TMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store %struct.S* [[TMP0]], %struct.S** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[TMP1]], i32** [[TMP7]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 -// CHECK1-NEXT: store i32* [[TMP]], i32** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP12]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK1-NEXT: store i32* [[TMP]], i32** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = select i1 [[TOBOOL]], i32 2, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = or i32 [[TMP11]], 1 -// CHECK1-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 [[TMP12]], i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.5*)* @.omp_task_entry..10 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to %struct.kmp_task_t_with_privates.5* -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.anon.4* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 16, i1 false) -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, i64* [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: store i64 [[CONV5]], i64* [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, i64* [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 9 -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP24]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[TMP22]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i8* [[TMP13]], i32 1, i64* [[TMP19]], i64* [[TMP20]], i64 [[TMP25]], i32 1, i32 2, i64 4, i8* null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB5]], i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = select i1 [[TOBOOL1]], i32 2, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 [[TMP17]], i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.9*)* @.omp_task_entry..10 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to %struct.kmp_task_t_with_privates.9* +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], %struct.kmp_task_t_with_privates.9* [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.anon.8* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP22]], i8* align 8 [[TMP23]], i64 16, i1 false) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, i64* [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK1-NEXT: store i64 [[CONV]], i64* [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, i64* [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP29]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, i64* [[TMP27]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i8* [[TMP18]], i32 1, i64* [[TMP24]], i64* [[TMP25]], i64 [[TMP30]], i32 1, i32 2, i64 4, i8* null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..10 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.9* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 @@ -798,7 +818,7 @@ // CHECK1-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.8*, align 8 // CHECK1-NEXT: [[TMP_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP1_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__I:%.*]] = alloca i32, align 4 @@ -809,17 +829,17 @@ // CHECK1-NEXT: [[TMP6_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.5*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.9*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.5* [[TMP1]], %struct.kmp_task_t_with_privates.5** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.9* [[TMP1]], %struct.kmp_task_t_with_privates.9** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.5*, %struct.kmp_task_t_with_privates.5** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.9*, %struct.kmp_task_t_with_privates.9** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], %struct.kmp_task_t_with_privates.9* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.4* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.5* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.8* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.9* [[TMP3]] to i8* // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 @@ -845,12 +865,12 @@ // CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !62 // CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !62 // CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !62 -// CHECK1-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP20]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP20]], i32 0, i32 0 // CHECK1-NEXT: [[TMP22:%.*]] = load %struct.S*, %struct.S** [[TMP21]], align 8 // CHECK1-NEXT: store i32* [[TMP_I]], i32** [[TMP1_I]], align 8, !noalias !62 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP20]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP20]], i32 0, i32 1 // CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP23]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !62 @@ -868,7 +888,7 @@ // CHECK1-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !62 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP29]] to i32 // CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !62 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP20]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP20]], i32 0, i32 1 // CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: @@ -905,14 +925,13 @@ // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i8, align 1 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED7:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i8, align 1 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -921,91 +940,97 @@ // CHECK2-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP3]]) +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED2]] to i32* -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED2]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP6]]) +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK2-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_4]], align 1 +// CHECK2-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_3]], align 1 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_4]], align 1 -// CHECK2-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED7]] to i8* -// CHECK2-NEXT: [[FROMBOOL9:%.*]] = zext i1 [[TOBOOL6]] to i8 -// CHECK2-NEXT: store i8 [[FROMBOOL9]], i8* [[CONV8]], align 1 -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED7]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK2-NEXT: [[CONV11:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED10]] to i32* -// CHECK2-NEXT: store i32 [[TMP11]], i32* [[CONV11]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED10]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_4]], align 1 -// CHECK2-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP13]] to i1 -// CHECK2-NEXT: br i1 [[TOBOOL12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 0 +// CHECK2-NEXT: store i32* [[I]], i32** [[TMP9]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 1 +// CHECK2-NEXT: store i32* [[ARGC_ADDR]], i32** [[TMP10]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 2 +// CHECK2-NEXT: store i8*** [[ARGV_ADDR]], i8**** [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP13:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_3]], align 1 +// CHECK2-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK2-NEXT: [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL6]] to i8 +// CHECK2-NEXT: store i8 [[FROMBOOL7]], i8* [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_3]], align 1 +// CHECK2-NEXT: [[TOBOOL8:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK2-NEXT: br i1 [[TOBOOL8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i8***, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]]) // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK2-NEXT: ret i32 [[TMP14]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK2-NEXT: ret i32 [[TMP17]] // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates* -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 4 -// CHECK2-NEXT: [[TMP9:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP8]] to i32* -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 5 -// CHECK2-NEXT: store i64 0, i64* [[TMP10]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 6 -// CHECK2-NEXT: store i64 9, i64* [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 7 -// CHECK2-NEXT: store i64 1, i64* [[TMP12]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 9 -// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i8* -// CHECK2-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP14]], i8 0, i64 8, i1 false) -// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP12]], align 8 -// CHECK2-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP5]], i32 1, i64* [[TMP10]], i64* [[TMP11]], i64 [[TMP15]], i32 1, i32 0, i64 0, i8* null) -// CHECK2-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct.kmp_task_t_with_privates* +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP9]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP12:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP11]] to i32* +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 5 +// CHECK2-NEXT: store i64 0, i64* [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 6 +// CHECK2-NEXT: store i64 9, i64* [[TMP14]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 7 +// CHECK2-NEXT: store i64 1, i64* [[TMP15]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 9 +// CHECK2-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i8* +// CHECK2-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP17]], i8 0, i64 8, i1 false) +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP15]], align 8 +// CHECK2-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP8]], i32 1, i64* [[TMP13]], i64* [[TMP14]], i64 [[TMP18]], i32 1, i32 0, i64 0, i8* null) +// CHECK2-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: ret void @@ -1024,7 +1049,7 @@ // CHECK2-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK2-NEXT: [[I_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 @@ -1037,7 +1062,7 @@ // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8 @@ -1064,8 +1089,8 @@ // CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK2-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 // CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 @@ -1088,47 +1113,51 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..4 to i32 (i32, i8*)*)) -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates.1* -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 5 -// CHECK2-NEXT: store i64 0, i64* [[TMP8]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 6 -// CHECK2-NEXT: store i64 9, i64* [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 7 -// CHECK2-NEXT: store i64 1, i64* [[TMP10]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 9 -// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i8* -// CHECK2-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP12]], i8 0, i64 8, i1 false) -// CHECK2-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP10]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK2-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP5]], i32 1, i64* [[TMP8]], i64* [[TMP9]], i64 [[TMP13]], i32 1, i32 1, i64 [[TMP14]], i8* null) -// CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.3*)* @.omp_task_entry..4 to i32 (i32, i8*)*)) +// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct.kmp_task_t_with_privates.3* +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP9]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 5 +// CHECK2-NEXT: store i64 0, i64* [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 6 +// CHECK2-NEXT: store i64 9, i64* [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 7 +// CHECK2-NEXT: store i64 1, i64* [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 9 +// CHECK2-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i8* +// CHECK2-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP15]], i8 0, i64 8, i1 false) +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK2-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP8]], i32 1, i64* [[TMP11]], i64* [[TMP12]], i64 [[TMP16]], i32 1, i32 1, i64 [[TMP17]], i8* null) +// CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry..4 -// CHECK2-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 @@ -1140,21 +1169,21 @@ // CHECK2-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.2*, align 8 // CHECK2-NEXT: [[I_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.1*, align 8 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.3*, align 8 // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK2-NEXT: store %struct.kmp_task_t_with_privates.1* [[TMP1]], %struct.kmp_task_t_with_privates.1** [[DOTADDR1]], align 8 +// CHECK2-NEXT: store %struct.kmp_task_t_with_privates.3* [[TMP1]], %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.1*, %struct.kmp_task_t_with_privates.1** [[DOTADDR1]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.3*, %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* -// CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.1* [[TMP3]] to i8* +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* +// CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.3* [[TMP3]] to i8* // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8 // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 @@ -1180,8 +1209,8 @@ // CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !31 // CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !31 // CHECK2-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !31 -// CHECK2-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 -// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 +// CHECK2-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 +// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 // CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !31 // CHECK2-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 // CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !31 @@ -1204,103 +1233,107 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[ARGV_ADDR:%.*]] = alloca i8***, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK2-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: store i8*** [[ARGV]], i8**** [[ARGV_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i8***, i8**** [[ARGV_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i8***, i8**** [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 8 +// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK2-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK2-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK2-NEXT: store i32* [[TMP0]], i32** [[TMP7]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK2-NEXT: store i32* [[TMP1]], i32** [[TMP8]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK2-NEXT: store i8*** [[TMP2]], i8**** [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV3]], align 4 -// CHECK2-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i8**, i8*** [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP13]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, i8* [[TMP15]], i64 [[IDXPROM8]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i8, i8* [[ARRAYIDX9]], align 1 -// CHECK2-NEXT: [[CONV10:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK2-NEXT: store i32 [[CONV10]], i32* [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK2-NEXT: store i32* [[TMP2]], i32** [[TMP15]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK2-NEXT: store i32* [[TMP4]], i32** [[TMP16]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK2-NEXT: store i8*** [[TMP6]], i8**** [[TMP17]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK2-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i8**, i8*** [[TMP6]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP21]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK2-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP23]], i64 [[IDXPROM6]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK2-NEXT: [[CONV:%.*]] = sext i8 [[TMP25]] to i32 +// CHECK2-NEXT: store i32 [[CONV]], i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP26]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[CONV12:%.*]] = sext i32 [[DIV]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[SUB13:%.*]] = sub i32 [[TMP19]], [[TMP20]] -// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[SUB13]], 1 -// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB14]], 1 -// CHECK2-NEXT: [[DIV15:%.*]] = udiv i32 [[ADD]], 1 -// CHECK2-NEXT: [[CONV16:%.*]] = zext i32 [[DIV15]] to i64 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV12]], [[CONV16]] -// CHECK2-NEXT: [[SUB17:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK2-NEXT: store i64 [[SUB17]], i64* [[DOTCAPTURE_EXPR_11]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1, i64 88, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.3*)* @.omp_task_entry..7 to i32 (i32, i8*)*)) -// CHECK2-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to %struct.kmp_task_t_with_privates.3* -// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP22]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP23]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP25:%.*]] = load i8*, i8** [[TMP24]], align 8 -// CHECK2-NEXT: [[TMP26:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP25]], i8* align 8 [[TMP26]], i64 24, i1 false) -// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], %struct.kmp_task_t_with_privates.3* [[TMP22]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP28:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP28]] to i1 -// CHECK2-NEXT: [[TMP29:%.*]] = sext i1 [[TOBOOL]] to i32 -// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP23]], i32 0, i32 5 -// CHECK2-NEXT: store i64 0, i64* [[TMP30]], align 8 -// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP23]], i32 0, i32 6 -// CHECK2-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_11]], align 8 -// CHECK2-NEXT: store i64 [[TMP32]], i64* [[TMP31]], align 8 -// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP23]], i32 0, i32 7 -// CHECK2-NEXT: store i64 1, i64* [[TMP33]], align 8 -// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP23]], i32 0, i32 9 -// CHECK2-NEXT: [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i8* -// CHECK2-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP35]], i8 0, i64 8, i1 false) -// CHECK2-NEXT: [[TMP36:%.*]] = load i64, i64* [[TMP33]], align 8 -// CHECK2-NEXT: [[TMP37:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP21]], i32 [[TMP29]], i64* [[TMP30]], i64* [[TMP31]], i64 [[TMP36]], i32 1, i32 2, i64 [[TMP37]], i8* bitcast (void (%struct.kmp_task_t_with_privates.3*, %struct.kmp_task_t_with_privates.3*, i32)* @.omp_task_dup. to i8*)) -// CHECK2-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[CONV9:%.*]] = sext i32 [[DIV]] to i64 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB11]], 1 +// CHECK2-NEXT: [[DIV12:%.*]] = udiv i32 [[ADD]], 1 +// CHECK2-NEXT: [[CONV13:%.*]] = zext i32 [[DIV12]] to i64 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV9]], [[CONV13]] +// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK2-NEXT: store i64 [[SUB14]], i64* [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 1, i64 88, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.6*)* @.omp_task_entry..7 to i32 (i32, i8*)*)) +// CHECK2-NEXT: [[TMP30:%.*]] = bitcast i8* [[TMP29]] to %struct.kmp_task_t_with_privates.6* +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], %struct.kmp_task_t_with_privates.6* [[TMP30]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP31]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP33:%.*]] = load i8*, i8** [[TMP32]], align 8 +// CHECK2-NEXT: [[TMP34:%.*]] = bitcast %struct.anon.5* [[AGG_CAPTURED]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP33]], i8* align 8 [[TMP34]], i64 24, i1 false) +// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], %struct.kmp_task_t_with_privates.6* [[TMP30]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP36:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP36]] to i1 +// CHECK2-NEXT: [[TMP37:%.*]] = sext i1 [[TOBOOL15]] to i32 +// CHECK2-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP31]], i32 0, i32 5 +// CHECK2-NEXT: store i64 0, i64* [[TMP38]], align 8 +// CHECK2-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP31]], i32 0, i32 6 +// CHECK2-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: store i64 [[TMP40]], i64* [[TMP39]], align 8 +// CHECK2-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP31]], i32 0, i32 7 +// CHECK2-NEXT: store i64 1, i64* [[TMP41]], align 8 +// CHECK2-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP31]], i32 0, i32 9 +// CHECK2-NEXT: [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i8* +// CHECK2-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP43]], i8 0, i64 8, i1 false) +// CHECK2-NEXT: [[TMP44:%.*]] = load i64, i64* [[TMP41]], align 8 +// CHECK2-NEXT: [[TMP45:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK2-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i8* [[TMP29]], i32 [[TMP37]], i64* [[TMP38]], i64* [[TMP39]], i64 [[TMP44]], i32 1, i32 2, i64 [[TMP45]], i8* bitcast (void (%struct.kmp_task_t_with_privates.6*, %struct.kmp_task_t_with_privates.6*, i32)* @.omp_task_dup. to i8*)) +// CHECK2-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: ret void @@ -1321,7 +1354,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry..7 -// CHECK2-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.6* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 @@ -1333,7 +1366,7 @@ // CHECK2-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.5*, align 8 // CHECK2-NEXT: [[DOTLASTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR__I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_2_I:%.*]] = alloca i32, align 4 @@ -1345,19 +1378,19 @@ // CHECK2-NEXT: [[J15_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.3*, align 8 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.6*, align 8 // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK2-NEXT: store %struct.kmp_task_t_with_privates.3* [[TMP1]], %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 +// CHECK2-NEXT: store %struct.kmp_task_t_with_privates.6* [[TMP1]], %struct.kmp_task_t_with_privates.6** [[DOTADDR1]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.3*, %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.6*, %struct.kmp_task_t_with_privates.6** [[DOTADDR1]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], %struct.kmp_task_t_with_privates.6* [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], %struct.kmp_task_t_with_privates.3* [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.5* +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], %struct.kmp_task_t_with_privates.6* [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* -// CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.3* [[TMP3]] to i8* +// CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.6* [[TMP3]] to i8* // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK2-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 // CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 @@ -1383,33 +1416,33 @@ // CHECK2-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !47 // CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !47 // CHECK2-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !47 -// CHECK2-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !47 -// CHECK2-NEXT: [[TMP22:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !47 +// CHECK2-NEXT: store %struct.anon.5* [[TMP8]], %struct.anon.5** [[__CONTEXT_ADDR_I]], align 8, !noalias !47 +// CHECK2-NEXT: [[TMP22:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR_I]], align 8, !noalias !47 // CHECK2-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !47 // CHECK2-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !47 // CHECK2-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**)* // CHECK2-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR2]] -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP22]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP22]], i32 0, i32 0 // CHECK2-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 // CHECK2-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !47 -// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 1 // CHECK2-NEXT: [[TMP30:%.*]] = load i32*, i32** [[TMP29]], align 8 // CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 // CHECK2-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47 -// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 1 // CHECK2-NEXT: [[TMP33:%.*]] = load i32*, i32** [[TMP32]], align 8 // CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 // CHECK2-NEXT: store i32 [[TMP34]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 -// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 2 // CHECK2-NEXT: [[TMP36:%.*]] = load i8***, i8**** [[TMP35]], align 8 // CHECK2-NEXT: [[TMP37:%.*]] = load i8**, i8*** [[TMP36]], align 8 -// CHECK2-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 1 // CHECK2-NEXT: [[TMP39:%.*]] = load i32*, i32** [[TMP38]], align 8 // CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 // CHECK2-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP40]] to i64 // CHECK2-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i8*, i8** [[TMP37]], i64 [[IDXPROM_I]] // CHECK2-NEXT: [[TMP41:%.*]] = load i8*, i8** [[ARRAYIDX_I]], align 8 -// CHECK2-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 1 // CHECK2-NEXT: [[TMP43:%.*]] = load i32*, i32** [[TMP42]], align 8 // CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 // CHECK2-NEXT: [[IDXPROM4_I:%.*]] = sext i32 [[TMP44]] to i64 @@ -1441,9 +1474,9 @@ // CHECK2: taskloop.if.then.i: // CHECK2-NEXT: [[TMP53:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !47 // CHECK2-NEXT: store i64 [[TMP53]], i64* [[DOTOMP_IV_I]], align 8, !noalias !47 -// CHECK2-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 1 // CHECK2-NEXT: [[TMP55:%.*]] = load i32*, i32** [[TMP54]], align 8 -// CHECK2-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 2 // CHECK2-NEXT: [[TMP57:%.*]] = load i8***, i8**** [[TMP56]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: @@ -1498,20 +1531,20 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_task_dup. -// CHECK2-SAME: (%struct.kmp_task_t_with_privates.3* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (%struct.kmp_task_t_with_privates.6* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.6* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR4]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates.3*, align 8 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.3*, align 8 +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates.6*, align 8 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.6*, align 8 // CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store %struct.kmp_task_t_with_privates.3* [[TMP0]], %struct.kmp_task_t_with_privates.3** [[DOTADDR]], align 8 -// CHECK2-NEXT: store %struct.kmp_task_t_with_privates.3* [[TMP1]], %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 +// CHECK2-NEXT: store %struct.kmp_task_t_with_privates.6* [[TMP0]], %struct.kmp_task_t_with_privates.6** [[DOTADDR]], align 8 +// CHECK2-NEXT: store %struct.kmp_task_t_with_privates.6* [[TMP1]], %struct.kmp_task_t_with_privates.6** [[DOTADDR1]], align 8 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTADDR2]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.3*, %struct.kmp_task_t_with_privates.3** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.6*, %struct.kmp_task_t_with_privates.6** [[DOTADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], %struct.kmp_task_t_with_privates.6* [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTADDR2]], align 4 // CHECK2-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], %struct.kmp_task_t_with_privates.3* [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], %struct.kmp_task_t_with_privates.6* [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: ret void // // @@ -1521,7 +1554,7 @@ // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK2-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 // CHECK2-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 // CHECK2-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 @@ -1529,89 +1562,96 @@ // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK2-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP1]] to i1 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store %struct.S* [[THIS1]], %struct.S** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store i32* [[C_ADDR]], i32** [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK2-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK2-NEXT: store i8 [[FROMBOOL3]], i8* [[CONV]], align 1 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S*, i32*, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.S* [[THIS1]], i32* [[C_ADDR]], i64 [[TMP2]]) +// CHECK2-NEXT: store i8 [[FROMBOOL3]], i8* [[TMP3]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S* noundef [[THIS:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 8 +// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK2-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK2-NEXT: br i1 [[TMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK2-NEXT: store %struct.S* [[TMP0]], %struct.S** [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK2-NEXT: store i32* [[TMP1]], i32** [[TMP7]], align 8 -// CHECK2-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 -// CHECK2-NEXT: store i32* [[TMP]], i32** [[_TMP1]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK2-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK2-NEXT: store i32* [[TMP4]], i32** [[TMP12]], align 8 +// CHECK2-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK2-NEXT: [[TMP13:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK2-NEXT: store i32* [[TMP]], i32** [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = select i1 [[TOBOOL]], i32 2, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = or i32 [[TMP11]], 1 -// CHECK2-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 [[TMP12]], i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.5*)* @.omp_task_entry..10 to i32 (i32, i8*)*)) -// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to %struct.kmp_task_t_with_privates.5* -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP14]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = bitcast %struct.anon.4* [[AGG_CAPTURED]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 16, i1 false) -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 5 -// CHECK2-NEXT: store i64 0, i64* [[TMP19]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 6 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[CONV5:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK2-NEXT: store i64 [[CONV5]], i64* [[TMP20]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 7 -// CHECK2-NEXT: store i64 1, i64* [[TMP22]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 9 -// CHECK2-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i8* -// CHECK2-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP24]], i8 0, i64 8, i1 false) -// CHECK2-NEXT: [[TMP25:%.*]] = load i64, i64* [[TMP22]], align 8 -// CHECK2-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i8* [[TMP13]], i32 1, i64* [[TMP19]], i64* [[TMP20]], i64 [[TMP25]], i32 1, i32 2, i64 4, i8* null) -// CHECK2-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK2-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB5]], i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = select i1 [[TOBOOL1]], i32 2, i32 0 +// CHECK2-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], 1 +// CHECK2-NEXT: [[TMP18:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 [[TMP17]], i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.9*)* @.omp_task_entry..10 to i32 (i32, i8*)*)) +// CHECK2-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to %struct.kmp_task_t_with_privates.9* +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], %struct.kmp_task_t_with_privates.9* [[TMP19]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = bitcast %struct.anon.8* [[AGG_CAPTURED]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP22]], i8* align 8 [[TMP23]], i64 16, i1 false) +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 5 +// CHECK2-NEXT: store i64 0, i64* [[TMP24]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 6 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK2-NEXT: store i64 [[CONV]], i64* [[TMP25]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 7 +// CHECK2-NEXT: store i64 1, i64* [[TMP27]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 9 +// CHECK2-NEXT: [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i8* +// CHECK2-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP29]], i8 0, i64 8, i1 false) +// CHECK2-NEXT: [[TMP30:%.*]] = load i64, i64* [[TMP27]], align 8 +// CHECK2-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i8* [[TMP18]], i32 1, i64* [[TMP24]], i64* [[TMP25]], i64 [[TMP30]], i32 1, i32 2, i64 4, i8* null) +// CHECK2-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry..10 -// CHECK2-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.9* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 @@ -1623,7 +1663,7 @@ // CHECK2-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.8*, align 8 // CHECK2-NEXT: [[TMP_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP1_I:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR__I:%.*]] = alloca i32, align 4 @@ -1634,17 +1674,17 @@ // CHECK2-NEXT: [[TMP6_I:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.5*, align 8 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.9*, align 8 // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK2-NEXT: store %struct.kmp_task_t_with_privates.5* [[TMP1]], %struct.kmp_task_t_with_privates.5** [[DOTADDR1]], align 8 +// CHECK2-NEXT: store %struct.kmp_task_t_with_privates.9* [[TMP1]], %struct.kmp_task_t_with_privates.9** [[DOTADDR1]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.5*, %struct.kmp_task_t_with_privates.5** [[DOTADDR1]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.9*, %struct.kmp_task_t_with_privates.9** [[DOTADDR1]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], %struct.kmp_task_t_with_privates.9* [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.4* -// CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.5* [[TMP3]] to i8* +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.8* +// CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.9* [[TMP3]] to i8* // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8 // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 @@ -1670,12 +1710,12 @@ // CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !62 // CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !62 // CHECK2-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !62 -// CHECK2-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 -// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP20]], i32 0, i32 0 +// CHECK2-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 +// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP20]], i32 0, i32 0 // CHECK2-NEXT: [[TMP22:%.*]] = load %struct.S*, %struct.S** [[TMP21]], align 8 // CHECK2-NEXT: store i32* [[TMP_I]], i32** [[TMP1_I]], align 8, !noalias !62 -// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP20]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP20]], i32 0, i32 1 // CHECK2-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP23]], align 8 // CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK2-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !62 @@ -1693,7 +1733,7 @@ // CHECK2-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !62 // CHECK2-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP29]] to i32 // CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !62 -// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP20]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP20]], i32 0, i32 1 // CHECK2-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: @@ -1750,14 +1790,13 @@ // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED7:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -1766,91 +1805,97 @@ // CHECK3-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[CONV]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP3]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED2]] to i32* -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED2]], align 8 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP6]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_4]], align 1 +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_3]], align 1 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_4]], align 1 -// CHECK3-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK3-NEXT: [[CONV8:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED7]] to i8* -// CHECK3-NEXT: [[FROMBOOL9:%.*]] = zext i1 [[TOBOOL6]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL9]], i8* [[CONV8]], align 1 -// CHECK3-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED7]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK3-NEXT: [[CONV11:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED10]] to i32* -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[CONV11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED10]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_4]], align 1 -// CHECK3-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP13]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[I]], i32** [[TMP9]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[ARGC_ADDR]], i32** [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 2 +// CHECK3-NEXT: store i8*** [[ARGV_ADDR]], i8**** [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP13:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_3]], align 1 +// CHECK3-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK3-NEXT: [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL6]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL7]], i8* [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_3]], align 1 +// CHECK3-NEXT: [[TOBOOL8:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i8***, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] +// CHECK3-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK3-NEXT: ret i32 [[TMP14]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK3-NEXT: ret i32 [[TMP17]] // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK3-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK3-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates* -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 4 -// CHECK3-NEXT: [[TMP9:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP8]] to i32* -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 5 -// CHECK3-NEXT: store i64 0, i64* [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 6 -// CHECK3-NEXT: store i64 9, i64* [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 7 -// CHECK3-NEXT: store i64 1, i64* [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 9 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i8* -// CHECK3-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP14]], i8 0, i64 8, i1 false) -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP12]], align 8 -// CHECK3-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP5]], i32 1, i64* [[TMP10]], i64* [[TMP11]], i64 [[TMP15]], i32 1, i32 0, i64 0, i8* null) -// CHECK3-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct.kmp_task_t_with_privates* +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP9]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP11]] to i32* +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 5 +// CHECK3-NEXT: store i64 0, i64* [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 6 +// CHECK3-NEXT: store i64 9, i64* [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 7 +// CHECK3-NEXT: store i64 1, i64* [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i8* +// CHECK3-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP17]], i8 0, i64 8, i1 false) +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP15]], align 8 +// CHECK3-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP8]], i32 1, i64* [[TMP13]], i64* [[TMP14]], i64 [[TMP18]], i32 1, i32 0, i64 0, i8* null) +// CHECK3-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -1869,7 +1914,7 @@ // CHECK3-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK3-NEXT: [[I_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 @@ -1882,7 +1927,7 @@ // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK3-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8 @@ -1909,8 +1954,8 @@ // CHECK3-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK3-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 // CHECK3-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 @@ -1933,47 +1978,51 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK3-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK3-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..4 to i32 (i32, i8*)*)) -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates.1* -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 5 -// CHECK3-NEXT: store i64 0, i64* [[TMP8]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 6 -// CHECK3-NEXT: store i64 9, i64* [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 7 -// CHECK3-NEXT: store i64 1, i64* [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 9 -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i8* -// CHECK3-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP12]], i8 0, i64 8, i1 false) -// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK3-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP5]], i32 1, i64* [[TMP8]], i64* [[TMP9]], i64 [[TMP13]], i32 1, i32 1, i64 [[TMP14]], i8* null) -// CHECK3-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.3*)* @.omp_task_entry..4 to i32 (i32, i8*)*)) +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct.kmp_task_t_with_privates.3* +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP9]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 5 +// CHECK3-NEXT: store i64 0, i64* [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 6 +// CHECK3-NEXT: store i64 9, i64* [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 7 +// CHECK3-NEXT: store i64 1, i64* [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i8* +// CHECK3-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP15]], i8 0, i64 8, i1 false) +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK3-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP8]], i32 1, i64* [[TMP11]], i64* [[TMP12]], i64 [[TMP16]], i32 1, i32 1, i64 [[TMP17]], i8* null) +// CHECK3-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_task_entry..4 -// CHECK3-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 @@ -1985,21 +2034,21 @@ // CHECK3-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.2*, align 8 // CHECK3-NEXT: [[I_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.1*, align 8 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.3*, align 8 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK3-NEXT: store %struct.kmp_task_t_with_privates.1* [[TMP1]], %struct.kmp_task_t_with_privates.1** [[DOTADDR1]], align 8 +// CHECK3-NEXT: store %struct.kmp_task_t_with_privates.3* [[TMP1]], %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.1*, %struct.kmp_task_t_with_privates.1** [[DOTADDR1]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.3*, %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* -// CHECK3-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.1* [[TMP3]] to i8* +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.3* [[TMP3]] to i8* // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 @@ -2025,8 +2074,8 @@ // CHECK3-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !31 // CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !31 // CHECK3-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !31 -// CHECK3-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 -// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 +// CHECK3-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 +// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 // CHECK3-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !31 // CHECK3-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 // CHECK3-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !31 @@ -2049,108 +2098,112 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[ARGV_ADDR:%.*]] = alloca i8***, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: store i8*** [[ARGV]], i8**** [[ARGV_ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8***, i8**** [[ARGV_ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK3-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -// CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK3-NEXT: store i32* [[TMP0]], i32** [[TMP7]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[TMP1]], i32** [[TMP8]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK3-NEXT: store i8*** [[TMP2]], i8**** [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8***, i8**** [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV3]], align 4 -// CHECK3-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i8**, i8*** [[TMP2]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP15]], i64 [[IDXPROM]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, i8* [[TMP17]], i64 [[IDXPROM8]] -// CHECK3-NEXT: [[TMP19:%.*]] = load i8, i8* [[ARRAYIDX9]], align 1 -// CHECK3-NEXT: [[CONV10:%.*]] = sext i8 [[TMP19]] to i32 -// CHECK3-NEXT: store i32 [[CONV10]], i32* [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3: omp_if.then: +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[TMP2]], i32** [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[TMP4]], i32** [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK3-NEXT: store i8*** [[TMP6]], i8**** [[TMP17]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP19]] to i1 +// CHECK3-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL2]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL3]], i8* [[TMP18]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i8**, i8*** [[TMP6]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP23]], i64 [[IDXPROM]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, i8* [[TMP25]], i64 [[IDXPROM8]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i8, i8* [[ARRAYIDX9]], align 1 +// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP27]] to i32 +// CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[CONV12:%.*]] = sext i32 [[DIV]] to i64 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK3-NEXT: [[SUB13:%.*]] = sub i32 [[TMP21]], [[TMP22]] -// CHECK3-NEXT: [[SUB14:%.*]] = sub i32 [[SUB13]], 1 -// CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB14]], 1 -// CHECK3-NEXT: [[DIV15:%.*]] = udiv i32 [[ADD]], 1 -// CHECK3-NEXT: [[CONV16:%.*]] = zext i32 [[DIV15]] to i64 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV12]], [[CONV16]] -// CHECK3-NEXT: [[SUB17:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK3-NEXT: store i64 [[SUB17]], i64* [[DOTCAPTURE_EXPR_11]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1, i64 88, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.3*)* @.omp_task_entry..7 to i32 (i32, i8*)*)) -// CHECK3-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP23]] to %struct.kmp_task_t_with_privates.3* -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP24]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP25]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP27:%.*]] = load i8*, i8** [[TMP26]], align 8 -// CHECK3-NEXT: [[TMP28:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP27]], i8* align 8 [[TMP28]], i64 32, i1 false) -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], %struct.kmp_task_t_with_privates.3* [[TMP24]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP30:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TOBOOL18:%.*]] = trunc i8 [[TMP30]] to i1 -// CHECK3-NEXT: [[TMP31:%.*]] = sext i1 [[TOBOOL18]] to i32 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP25]], i32 0, i32 5 -// CHECK3-NEXT: store i64 0, i64* [[TMP32]], align 8 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP25]], i32 0, i32 6 -// CHECK3-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_11]], align 8 -// CHECK3-NEXT: store i64 [[TMP34]], i64* [[TMP33]], align 8 -// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP25]], i32 0, i32 7 -// CHECK3-NEXT: store i64 1, i64* [[TMP35]], align 8 -// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP25]], i32 0, i32 9 -// CHECK3-NEXT: [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i8* -// CHECK3-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP37]], i8 0, i64 8, i1 false) -// CHECK3-NEXT: [[TMP38:%.*]] = load i64, i64* [[TMP35]], align 8 -// CHECK3-NEXT: [[TMP39:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK3-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP23]], i32 [[TMP31]], i64* [[TMP32]], i64* [[TMP33]], i64 [[TMP38]], i32 1, i32 2, i64 [[TMP39]], i8* bitcast (void (%struct.kmp_task_t_with_privates.3*, %struct.kmp_task_t_with_privates.3*, i32)* @.omp_task_dup. to i8*)) -// CHECK3-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[CONV11:%.*]] = sext i32 [[DIV]] to i64 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK3-NEXT: [[SUB12:%.*]] = sub i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: [[SUB13:%.*]] = sub i32 [[SUB12]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB13]], 1 +// CHECK3-NEXT: [[DIV14:%.*]] = udiv i32 [[ADD]], 1 +// CHECK3-NEXT: [[CONV15:%.*]] = zext i32 [[DIV14]] to i64 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV11]], [[CONV15]] +// CHECK3-NEXT: [[SUB16:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK3-NEXT: store i64 [[SUB16]], i64* [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 1, i64 88, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.6*)* @.omp_task_entry..7 to i32 (i32, i8*)*)) +// CHECK3-NEXT: [[TMP32:%.*]] = bitcast i8* [[TMP31]] to %struct.kmp_task_t_with_privates.6* +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], %struct.kmp_task_t_with_privates.6* [[TMP32]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP33]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP35:%.*]] = load i8*, i8** [[TMP34]], align 8 +// CHECK3-NEXT: [[TMP36:%.*]] = bitcast %struct.anon.5* [[AGG_CAPTURED]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 [[TMP36]], i64 32, i1 false) +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], %struct.kmp_task_t_with_privates.6* [[TMP32]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP38:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL17:%.*]] = trunc i8 [[TMP38]] to i1 +// CHECK3-NEXT: [[TMP39:%.*]] = sext i1 [[TOBOOL17]] to i32 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP33]], i32 0, i32 5 +// CHECK3-NEXT: store i64 0, i64* [[TMP40]], align 8 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP33]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK3-NEXT: store i64 [[TMP42]], i64* [[TMP41]], align 8 +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP33]], i32 0, i32 7 +// CHECK3-NEXT: store i64 1, i64* [[TMP43]], align 8 +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP33]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i8* +// CHECK3-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP45]], i8 0, i64 8, i1 false) +// CHECK3-NEXT: [[TMP46:%.*]] = load i64, i64* [[TMP43]], align 8 +// CHECK3-NEXT: [[TMP47:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK3-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i8* [[TMP31]], i32 [[TMP39]], i64* [[TMP40]], i64* [[TMP41]], i64 [[TMP46]], i32 1, i32 2, i64 [[TMP47]], i8* bitcast (void (%struct.kmp_task_t_with_privates.6*, %struct.kmp_task_t_with_privates.6*, i32)* @.omp_task_dup. to i8*)) +// CHECK3-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -2171,7 +2224,7 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_task_entry..7 -// CHECK3-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.6* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 @@ -2183,7 +2236,7 @@ // CHECK3-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.5*, align 8 // CHECK3-NEXT: [[DOTLASTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_2_I:%.*]] = alloca i32, align 4 @@ -2195,19 +2248,19 @@ // CHECK3-NEXT: [[J15_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.3*, align 8 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.6*, align 8 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK3-NEXT: store %struct.kmp_task_t_with_privates.3* [[TMP1]], %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 +// CHECK3-NEXT: store %struct.kmp_task_t_with_privates.6* [[TMP1]], %struct.kmp_task_t_with_privates.6** [[DOTADDR1]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.3*, %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.6*, %struct.kmp_task_t_with_privates.6** [[DOTADDR1]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], %struct.kmp_task_t_with_privates.6* [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], %struct.kmp_task_t_with_privates.3* [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.5* +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], %struct.kmp_task_t_with_privates.6* [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* -// CHECK3-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.3* [[TMP3]] to i8* +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.6* [[TMP3]] to i8* // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 @@ -2233,33 +2286,33 @@ // CHECK3-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !47 // CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !47 // CHECK3-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !47 -// CHECK3-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !47 -// CHECK3-NEXT: [[TMP22:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !47 +// CHECK3-NEXT: store %struct.anon.5* [[TMP8]], %struct.anon.5** [[__CONTEXT_ADDR_I]], align 8, !noalias !47 +// CHECK3-NEXT: [[TMP22:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR_I]], align 8, !noalias !47 // CHECK3-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !47 // CHECK3-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !47 // CHECK3-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**)* // CHECK3-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP22]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP22]], i32 0, i32 0 // CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 // CHECK3-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !47 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 1 // CHECK3-NEXT: [[TMP30:%.*]] = load i32*, i32** [[TMP29]], align 8 // CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 // CHECK3-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 1 // CHECK3-NEXT: [[TMP33:%.*]] = load i32*, i32** [[TMP32]], align 8 // CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 // CHECK3-NEXT: store i32 [[TMP34]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 -// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 2 // CHECK3-NEXT: [[TMP36:%.*]] = load i8***, i8**** [[TMP35]], align 8 // CHECK3-NEXT: [[TMP37:%.*]] = load i8**, i8*** [[TMP36]], align 8 -// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 1 // CHECK3-NEXT: [[TMP39:%.*]] = load i32*, i32** [[TMP38]], align 8 // CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 // CHECK3-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP40]] to i64 // CHECK3-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i8*, i8** [[TMP37]], i64 [[IDXPROM_I]] // CHECK3-NEXT: [[TMP41:%.*]] = load i8*, i8** [[ARRAYIDX_I]], align 8 -// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 1 // CHECK3-NEXT: [[TMP43:%.*]] = load i32*, i32** [[TMP42]], align 8 // CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 // CHECK3-NEXT: [[IDXPROM4_I:%.*]] = sext i32 [[TMP44]] to i64 @@ -2291,11 +2344,11 @@ // CHECK3: taskloop.if.then.i: // CHECK3-NEXT: [[TMP53:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !47 // CHECK3-NEXT: store i64 [[TMP53]], i64* [[DOTOMP_IV_I]], align 8, !noalias !47 -// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 1 // CHECK3-NEXT: [[TMP55:%.*]] = load i32*, i32** [[TMP54]], align 8 -// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 2 // CHECK3-NEXT: [[TMP57:%.*]] = load i8***, i8**** [[TMP56]], align 8 -// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP22]], i32 0, i32 3 // CHECK3-NEXT: [[TMP59:%.*]] = load i8, i8* [[TMP58]], align 1 // CHECK3-NEXT: [[TOBOOL_I:%.*]] = trunc i8 [[TMP59]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL_I]], label [[OMP_IF_THEN_I:%.*]], label [[OMP_IF_ELSE_I:%.*]] @@ -2398,20 +2451,20 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_task_dup. -// CHECK3-SAME: (%struct.kmp_task_t_with_privates.3* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (%struct.kmp_task_t_with_privates.6* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.6* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates.3*, align 8 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.3*, align 8 +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates.6*, align 8 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.6*, align 8 // CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store %struct.kmp_task_t_with_privates.3* [[TMP0]], %struct.kmp_task_t_with_privates.3** [[DOTADDR]], align 8 -// CHECK3-NEXT: store %struct.kmp_task_t_with_privates.3* [[TMP1]], %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 +// CHECK3-NEXT: store %struct.kmp_task_t_with_privates.6* [[TMP0]], %struct.kmp_task_t_with_privates.6** [[DOTADDR]], align 8 +// CHECK3-NEXT: store %struct.kmp_task_t_with_privates.6* [[TMP1]], %struct.kmp_task_t_with_privates.6** [[DOTADDR1]], align 8 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.3*, %struct.kmp_task_t_with_privates.3** [[DOTADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.6*, %struct.kmp_task_t_with_privates.6** [[DOTADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], %struct.kmp_task_t_with_privates.6* [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTADDR2]], align 4 // CHECK3-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], %struct.kmp_task_t_with_privates.3* [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], %struct.kmp_task_t_with_privates.6* [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: ret void // // @@ -2441,7 +2494,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK3-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 // CHECK3-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 @@ -2449,89 +2502,96 @@ // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP1]] to i1 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.S* [[THIS1]], %struct.S** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[C_ADDR]], i32** [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK3-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL3]], i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S*, i32*, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.S* [[THIS1]], i32* [[C_ADDR]], i64 [[TMP2]]) +// CHECK3-NEXT: store i8 [[FROMBOOL3]], i8* [[TMP3]], align 8 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S* noundef [[THIS:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C_ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK3-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK3-NEXT: br i1 [[TMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK3-NEXT: store %struct.S* [[TMP0]], %struct.S** [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[TMP1]], i32** [[TMP7]], align 8 -// CHECK3-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 -// CHECK3-NEXT: store i32* [[TMP]], i32** [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[TMP4]], i32** [[TMP12]], align 8 +// CHECK3-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK3-NEXT: store i32* [[TMP]], i32** [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = select i1 [[TOBOOL]], i32 2, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = or i32 [[TMP11]], 1 -// CHECK3-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 [[TMP12]], i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.5*)* @.omp_task_entry..10 to i32 (i32, i8*)*)) -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to %struct.kmp_task_t_with_privates.5* -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP14]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = bitcast %struct.anon.4* [[AGG_CAPTURED]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 16, i1 false) -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 5 -// CHECK3-NEXT: store i64 0, i64* [[TMP19]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 6 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: [[CONV5:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK3-NEXT: store i64 [[CONV5]], i64* [[TMP20]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 7 -// CHECK3-NEXT: store i64 1, i64* [[TMP22]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 9 -// CHECK3-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i8* -// CHECK3-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP24]], i8 0, i64 8, i1 false) -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, i64* [[TMP22]], align 8 -// CHECK3-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i8* [[TMP13]], i32 1, i64* [[TMP19]], i64* [[TMP20]], i64 [[TMP25]], i32 1, i32 2, i64 4, i8* null) -// CHECK3-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB5]], i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = select i1 [[TOBOOL1]], i32 2, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 [[TMP17]], i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.10*)* @.omp_task_entry..10 to i32 (i32, i8*)*)) +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to %struct.kmp_task_t_with_privates.10* +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_10:%.*]], %struct.kmp_task_t_with_privates.10* [[TMP19]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 8 +// CHECK3-NEXT: [[TMP23:%.*]] = bitcast %struct.anon.9* [[AGG_CAPTURED]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP22]], i8* align 8 [[TMP23]], i64 16, i1 false) +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 5 +// CHECK3-NEXT: store i64 0, i64* [[TMP24]], align 8 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK3-NEXT: store i64 [[CONV]], i64* [[TMP25]], align 8 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 7 +// CHECK3-NEXT: store i64 1, i64* [[TMP27]], align 8 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i8* +// CHECK3-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP29]], i8 0, i64 8, i1 false) +// CHECK3-NEXT: [[TMP30:%.*]] = load i64, i64* [[TMP27]], align 8 +// CHECK3-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i8* [[TMP18]], i32 1, i64* [[TMP24]], i64* [[TMP25]], i64 [[TMP30]], i32 1, i32 2, i64 4, i8* null) +// CHECK3-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_task_entry..10 -// CHECK3-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.10* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 @@ -2543,7 +2603,7 @@ // CHECK3-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.9*, align 8 // CHECK3-NEXT: [[TMP_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP1_I:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__I:%.*]] = alloca i32, align 4 @@ -2554,17 +2614,17 @@ // CHECK3-NEXT: [[TMP6_I:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.5*, align 8 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.10*, align 8 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK3-NEXT: store %struct.kmp_task_t_with_privates.5* [[TMP1]], %struct.kmp_task_t_with_privates.5** [[DOTADDR1]], align 8 +// CHECK3-NEXT: store %struct.kmp_task_t_with_privates.10* [[TMP1]], %struct.kmp_task_t_with_privates.10** [[DOTADDR1]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.5*, %struct.kmp_task_t_with_privates.5** [[DOTADDR1]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.10*, %struct.kmp_task_t_with_privates.10** [[DOTADDR1]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_10:%.*]], %struct.kmp_task_t_with_privates.10* [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.4* -// CHECK3-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.5* [[TMP3]] to i8* +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.9* +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.10* [[TMP3]] to i8* // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 @@ -2590,12 +2650,12 @@ // CHECK3-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !64 // CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !64 // CHECK3-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !64 -// CHECK3-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 -// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP20]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.anon.9* [[TMP8]], %struct.anon.9** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 +// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP20]], i32 0, i32 0 // CHECK3-NEXT: [[TMP22:%.*]] = load %struct.S*, %struct.S** [[TMP21]], align 8 // CHECK3-NEXT: store i32* [[TMP_I]], i32** [[TMP1_I]], align 8, !noalias !64 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP20]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP20]], i32 0, i32 1 // CHECK3-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP23]], align 8 // CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !64 @@ -2613,7 +2673,7 @@ // CHECK3-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !64 // CHECK3-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP29]] to i32 // CHECK3-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !64 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP20]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP20]], i32 0, i32 1 // CHECK3-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK3: omp.inner.for.cond.i: diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp --- a/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp @@ -207,6 +207,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 16 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) // CHECK1-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]]) @@ -218,15 +219,23 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT]], double noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]], double noundef 3.000000e+00) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S]*, %struct.S*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], i32* [[T_VAR]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[VAR]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] @@ -234,8 +243,8 @@ // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]]) #[[ATTR4]] // CHECK1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP6]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ev @@ -262,53 +271,52 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 8 dereferenceable(16) [[S_ARR:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 -// CHECK1-NEXT: br i1 [[TMP7]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[TMP1]], i32** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store [2 x %struct.S]* [[TMP2]], [2 x %struct.S]** [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 4 -// CHECK1-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP12]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 9, i64 120, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP17]], i8* align 8 [[TMP18]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP14]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP19]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP20]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP2]], [2 x i32]** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S]* [[TMP6]], [2 x %struct.S]** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 4 +// CHECK1-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP17]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP18:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 9, i64 120, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP22]], i8* align 8 [[TMP23]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP24]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP25]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: @@ -318,24 +326,24 @@ // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP19]], i32 0, i32 1 -// CHECK1-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP21]]) -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP22]] to i32 (i32, i8*)** -// CHECK1-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_destructor. to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, i64* [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, i64* [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, i64* [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP15]], i32 0, i32 9 -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP28]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP29:%.*]] = load i64, i64* [[TMP26]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i8* [[TMP13]], i32 1, i64* [[TMP24]], i64* [[TMP25]], i64 [[TMP29]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP24]], i32 0, i32 1 +// CHECK1-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP26]]) +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP27]] to i32 (i32, i8*)** +// CHECK1-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_destructor. to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, i64* [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, i64* [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP33]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, i64* [[TMP31]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i8* [[TMP18]], i32 1, i64* [[TMP29]], i64* [[TMP30]], i64 [[TMP34]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -388,7 +396,7 @@ // CHECK1-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTLASTPRIV_PTR_ADDR_I:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[DOTLASTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTLASTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x %struct.S]*, align 8 @@ -406,7 +414,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -435,25 +443,25 @@ // CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* // CHECK1-NEXT: call void [[TMP25]](i8* [[TMP24]], %struct.S** [[DOTLASTPRIV_PTR_ADDR_I]], i32** [[DOTLASTPRIV_PTR_ADDR1_I]], [2 x %struct.S]** [[DOTLASTPRIV_PTR_ADDR2_I]], [2 x i32]** [[DOTLASTPRIV_PTR_ADDR3_I]], i32** [[DOTLASTPRIV_PTR_ADDR4_I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP22]], i32 0, i32 3 // CHECK1-NEXT: [[TMP27:%.*]] = load %struct.S*, %struct.S** [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP22]], i32 0, i32 1 // CHECK1-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP22]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP22]], i32 0, i32 0 // CHECK1-NEXT: [[TMP33:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP22]], i32 0, i32 2 // CHECK1-NEXT: [[TMP35:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP22]], i32 0, i32 3 // CHECK1-NEXT: [[TMP37:%.*]] = load %struct.S*, %struct.S** [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP22]], i32 0, i32 4 // CHECK1-NEXT: [[TMP39:%.*]] = load i32*, i32** [[TMP38]], align 8 // CHECK1-NEXT: [[TMP40:%.*]] = load %struct.S*, %struct.S** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -592,39 +600,48 @@ // CHECK1-SAME: () #[[ATTR9:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TTT]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 128 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], i32* [[T_VAR]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[VAR]]) +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP6]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ev @@ -662,137 +679,136 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 -// CHECK1-NEXT: br i1 [[TMP7]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[TMP1]], i32** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[TMP2]], [2 x %struct.S.0]** [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP11]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP12:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 9, i64 256, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_entry..5 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to %struct.kmp_task_t_with_privates.2* -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 128 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast %struct.anon.1* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP16]], i8* align 8 [[TMP17]], i64 32, i1 false) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP13]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP18]], i32 0, i32 2 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP19]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP2]], [2 x i32]** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S.1]* [[TMP6]], [2 x %struct.S.1]** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP16]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP17:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 9, i64 256, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.4*)* @.omp_task_entry..5 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to %struct.kmp_task_t_with_privates.4* +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], %struct.kmp_task_t_with_privates.4* [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 128 +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.anon.3* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 32, i1 false) +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], %struct.kmp_task_t_with_privates.4* [[TMP18]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], %struct..kmp_privates.t.5* [[TMP23]], i32 0, i32 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP24]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP18]], i32 0, i32 3 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP20]]) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP22:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP21]] to i32 (i32, i8*)** -// CHECK1-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_destructor..7 to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, i64* [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, i64* [[TMP24]], align 16 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, i64* [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 9 -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP27]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP28:%.*]] = load i64, i64* [[TMP25]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i8* [[TMP12]], i32 1, i64* [[TMP23]], i64* [[TMP24]], i64 [[TMP28]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2*, i32)* @.omp_task_dup..6 to i8*)) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP23]], i32 0, i32 3 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP25]]) +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP26]] to i32 (i32, i8*)** +// CHECK1-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.4*)* @.omp_task_destructor..7 to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP19]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, i64* [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP19]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, i64* [[TMP29]], align 16 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP19]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, i64* [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP19]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP32]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, i64* [[TMP30]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i8* [[TMP17]], i32 1, i64* [[TMP28]], i64* [[TMP29]], i64 [[TMP33]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates.4*, %struct.kmp_task_t_with_privates.4*, i32)* @.omp_task_dup..6 to i8*)) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_privates_map..4 -// CHECK1-SAME: (%struct..kmp_privates.t.3* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]], [2 x i32]** noalias noundef [[TMP2:%.*]], [2 x %struct.S.0]** noalias noundef [[TMP3:%.*]], %struct.S.0** noalias noundef [[TMP4:%.*]]) #[[ATTR6]] { +// CHECK1-SAME: (%struct..kmp_privates.t.5* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]], [2 x i32]** noalias noundef [[TMP2:%.*]], [2 x %struct.S.1]** noalias noundef [[TMP3:%.*]], %struct.S.1** noalias noundef [[TMP4:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.3*, align 8 +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.5*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 // CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca [2 x i32]**, align 8 -// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca [2 x %struct.S.0]**, align 8 -// CHECK1-NEXT: [[DOTADDR4:%.*]] = alloca %struct.S.0**, align 8 -// CHECK1-NEXT: store %struct..kmp_privates.t.3* [[TMP0]], %struct..kmp_privates.t.3** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca [2 x %struct.S.1]**, align 8 +// CHECK1-NEXT: [[DOTADDR4:%.*]] = alloca %struct.S.1**, align 8 +// CHECK1-NEXT: store %struct..kmp_privates.t.5* [[TMP0]], %struct..kmp_privates.t.5** [[DOTADDR]], align 8 // CHECK1-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 // CHECK1-NEXT: store [2 x i32]** [[TMP2]], [2 x i32]*** [[DOTADDR2]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]** [[TMP3]], [2 x %struct.S.0]*** [[DOTADDR3]], align 8 -// CHECK1-NEXT: store %struct.S.0** [[TMP4]], %struct.S.0*** [[DOTADDR4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load %struct..kmp_privates.t.3*, %struct..kmp_privates.t.3** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x %struct.S.1]** [[TMP3]], [2 x %struct.S.1]*** [[DOTADDR3]], align 8 +// CHECK1-NEXT: store %struct.S.1** [[TMP4]], %struct.S.1*** [[DOTADDR4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load %struct..kmp_privates.t.5*, %struct..kmp_privates.t.5** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], %struct..kmp_privates.t.5* [[TMP5]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 // CHECK1-NEXT: store i32* [[TMP6]], i32** [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP5]], i32 0, i32 1 // CHECK1-NEXT: [[TMP9:%.*]] = load [2 x i32]**, [2 x i32]*** [[DOTADDR2]], align 8 // CHECK1-NEXT: store [2 x i32]* [[TMP8]], [2 x i32]** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load [2 x %struct.S.0]**, [2 x %struct.S.0]*** [[DOTADDR3]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[TMP10]], [2 x %struct.S.0]** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP13:%.*]] = load %struct.S.0**, %struct.S.0*** [[DOTADDR4]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP12]], %struct.S.0** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP5]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP11:%.*]] = load [2 x %struct.S.1]**, [2 x %struct.S.1]*** [[DOTADDR3]], align 8 +// CHECK1-NEXT: store [2 x %struct.S.1]* [[TMP10]], [2 x %struct.S.1]** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP5]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP13:%.*]] = load %struct.S.1**, %struct.S.1*** [[DOTADDR4]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP13]], align 8 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..5 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.4* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 @@ -804,27 +820,27 @@ // CHECK1-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTLASTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTLASTPRIV_PTR_ADDR1_I:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[DOTLASTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[DOTLASTPRIV_PTR_ADDR3_I:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[DOTLASTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x %struct.S.1]*, align 8 +// CHECK1-NEXT: [[DOTLASTPRIV_PTR_ADDR3_I:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[I_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.4*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.4* [[TMP1]], %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.4*, %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], %struct.kmp_task_t_with_privates.4* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 128 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.3* [[TMP9]] to i8* -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.2* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.3* +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], %struct.kmp_task_t_with_privates.4* [[TMP3]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.5* [[TMP9]] to i8* +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.4* [[TMP3]] to i8* // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 @@ -843,35 +859,35 @@ // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !32 // CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !32 // CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !32 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.3*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* @.omp_task_privates_map..4 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !32 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.5*, i32**, [2 x i32]**, [2 x %struct.S.1]**, %struct.S.1**)* @.omp_task_privates_map..4 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !32 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !32 // CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !32 // CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !32 // CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !32 // CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !32 // CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !32 -// CHECK1-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 -// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 +// CHECK1-NEXT: store %struct.anon.3* [[TMP8]], %struct.anon.3** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 +// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 // CHECK1-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !32 // CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !32 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* -// CHECK1-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTLASTPRIV_PTR_ADDR_I]], [2 x i32]** [[DOTLASTPRIV_PTR_ADDR1_I]], [2 x %struct.S.0]** [[DOTLASTPRIV_PTR_ADDR2_I]], %struct.S.0** [[DOTLASTPRIV_PTR_ADDR3_I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**, [2 x i32]**, [2 x %struct.S.1]**, %struct.S.1**)* +// CHECK1-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTLASTPRIV_PTR_ADDR_I]], [2 x i32]** [[DOTLASTPRIV_PTR_ADDR1_I]], [2 x %struct.S.1]** [[DOTLASTPRIV_PTR_ADDR2_I]], %struct.S.1** [[DOTLASTPRIV_PTR_ADDR3_I]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP22]], i32 0, i32 1 // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP22]], i32 0, i32 0 // CHECK1-NEXT: [[TMP29:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP31:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP33:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP35:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP37:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP22]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP31:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP22]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP33:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP22]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP35:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP22]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP37:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP36]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !32 // CHECK1-NEXT: [[TMP39:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !32 -// CHECK1-NEXT: [[TMP40:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[DOTLASTPRIV_PTR_ADDR2_I]], align 8, !noalias !32 -// CHECK1-NEXT: [[TMP41:%.*]] = load %struct.S.0*, %struct.S.0** [[DOTLASTPRIV_PTR_ADDR3_I]], align 8, !noalias !32 +// CHECK1-NEXT: [[TMP40:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[DOTLASTPRIV_PTR_ADDR2_I]], align 8, !noalias !32 +// CHECK1-NEXT: [[TMP41:%.*]] = load %struct.S.1*, %struct.S.1** [[DOTLASTPRIV_PTR_ADDR3_I]], align 8, !noalias !32 // CHECK1-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !32 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP42]] to i32 // CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !32 @@ -888,9 +904,9 @@ // CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP38]], align 128, !llvm.access.group !33 // CHECK1-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP39]], i64 0, i64 0 // CHECK1-NEXT: store i32 [[TMP46]], i32* [[ARRAYIDX_I]], align 4, !llvm.access.group !33 -// CHECK1-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP40]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5_I]] to i8* -// CHECK1-NEXT: [[TMP48:%.*]] = bitcast %struct.S.0* [[TMP41]] to i8* +// CHECK1-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP40]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %struct.S.1* [[ARRAYIDX5_I]] to i8* +// CHECK1-NEXT: [[TMP48:%.*]] = bitcast %struct.S.1* [[TMP41]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 4, i1 false) #[[ATTR4]], !llvm.access.group !33 // CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !32, !llvm.access.group !33 // CHECK1-NEXT: [[ADD6_I:%.*]] = add nsw i32 [[TMP49]], 1 @@ -906,23 +922,23 @@ // CHECK1-NEXT: [[TMP53:%.*]] = bitcast [2 x i32]* [[TMP29]] to i8* // CHECK1-NEXT: [[TMP54:%.*]] = bitcast [2 x i32]* [[TMP39]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP53]], i8* align 4 [[TMP54]], i64 8, i1 false) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN_I:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP31]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP55:%.*]] = bitcast [2 x %struct.S.0]* [[TMP40]] to %struct.S.0* -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN_I]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN_I:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP31]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP55:%.*]] = bitcast [2 x %struct.S.1]* [[TMP40]] to %struct.S.1* +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr [[STRUCT_S_1:%.*]], %struct.S.1* [[ARRAY_BEGIN_I]], i64 2 // CHECK1-NEXT: br label [[OMP_ARRAYCPY_BODY_I:%.*]] // CHECK1: omp.arraycpy.body.i: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST_I:%.*]] = phi %struct.S.0* [ [[TMP55]], [[DOTOMP_LASTPRIVATE_THEN_I]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT_I:%.*]], [[OMP_ARRAYCPY_BODY_I]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST_I:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN_I]], [[DOTOMP_LASTPRIVATE_THEN_I]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT_I:%.*]], [[OMP_ARRAYCPY_BODY_I]] ] -// CHECK1-NEXT: [[TMP57:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST_I]] to i8* -// CHECK1-NEXT: [[TMP58:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST_I]] to i8* +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST_I:%.*]] = phi %struct.S.1* [ [[TMP55]], [[DOTOMP_LASTPRIVATE_THEN_I]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT_I:%.*]], [[OMP_ARRAYCPY_BODY_I]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST_I:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN_I]], [[DOTOMP_LASTPRIVATE_THEN_I]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT_I:%.*]], [[OMP_ARRAYCPY_BODY_I]] ] +// CHECK1-NEXT: [[TMP57:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST_I]] to i8* +// CHECK1-NEXT: [[TMP58:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST_I]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP57]], i8* align 4 [[TMP58]], i64 4, i1 false) #[[ATTR4]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT_I]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST_I]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT_I]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST_I]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE_I:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT_I]], [[TMP56]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT_I]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST_I]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT_I]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST_I]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE_I:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT_I]], [[TMP56]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE_I]], label [[OMP_ARRAYCPY_DONE7_I:%.*]], label [[OMP_ARRAYCPY_BODY_I]] // CHECK1: omp.arraycpy.done7.i: -// CHECK1-NEXT: [[TMP59:%.*]] = bitcast %struct.S.0* [[TMP35]] to i8* -// CHECK1-NEXT: [[TMP60:%.*]] = bitcast %struct.S.0* [[TMP41]] to i8* +// CHECK1-NEXT: [[TMP59:%.*]] = bitcast %struct.S.1* [[TMP35]] to i8* +// CHECK1-NEXT: [[TMP60:%.*]] = bitcast %struct.S.1* [[TMP41]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP59]], i8* align 4 [[TMP60]], i64 4, i1 false) #[[ATTR4]] // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__3_EXIT]] // CHECK1: .omp_outlined..3.exit: @@ -930,57 +946,57 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_dup..6 -// CHECK1-SAME: (%struct.kmp_task_t_with_privates.2* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (%struct.kmp_task_t_with_privates.4* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.4* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR7]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates.4*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.4*, align 8 // CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP0]], %struct.kmp_task_t_with_privates.2** [[DOTADDR]], align 8 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.4* [[TMP0]], %struct.kmp_task_t_with_privates.4** [[DOTADDR]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.4* [[TMP1]], %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTADDR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.4*, %struct.kmp_task_t_with_privates.4** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], %struct.kmp_task_t_with_privates.4* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTADDR2]], align 4 // CHECK1-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 64 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP7]], i32 0, i32 2 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP8]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], %struct.kmp_task_t_with_privates.4* [[TMP3]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], %struct..kmp_privates.t.5* [[TMP7]], i32 0, i32 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP7]], i32 0, i32 3 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP9]]) +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP7]], i32 0, i32 3 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP9]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_destructor..7 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.4* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.4*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP2]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 3 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP5]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.4* [[TMP1]], %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.kmp_task_t_with_privates.4*, %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], %struct.kmp_task_t_with_privates.4* [[TMP2]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], %struct..kmp_privates.t.5* [[TMP3]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP3]], i32 0, i32 3 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP5]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done2: // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 @@ -988,46 +1004,46 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, i32* [[F]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -1042,46 +1058,49 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK3-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK3-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK3-NEXT: store double* @g, double** [[TMP4]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK3-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP5]], align 8 -// CHECK3-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK3-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct.kmp_task_t_with_privates* -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 [[TMP11]], i64 16, i1 false) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 5 -// CHECK3-NEXT: store i64 0, i64* [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 6 -// CHECK3-NEXT: store i64 9, i64* [[TMP14]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 7 -// CHECK3-NEXT: store i64 1, i64* [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 9 -// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i8* -// CHECK3-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP17]], i8 0, i64 8, i1 false) -// CHECK3-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP15]], align 8 -// CHECK3-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP6]], i32 1, i64* [[TMP13]], i64* [[TMP14]], i64 [[TMP18]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) -// CHECK3-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: store double* @g, double** [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP6]], align 8 +// CHECK3-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP7:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.kmp_task_t_with_privates* +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP8]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP9]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP8]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP9]], i32 0, i32 5 +// CHECK3-NEXT: store i64 0, i64* [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP9]], i32 0, i32 6 +// CHECK3-NEXT: store i64 9, i64* [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP9]], i32 0, i32 7 +// CHECK3-NEXT: store i64 1, i64* [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP9]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i8* +// CHECK3-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP18]], i8 0, i64 8, i1 false) +// CHECK3-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP16]], align 8 +// CHECK3-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* [[TMP7]], i32 1, i64* [[TMP14]], i64* [[TMP15]], i64 [[TMP19]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) +// CHECK3-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -1119,12 +1138,12 @@ // CHECK3-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK3-NEXT: [[DOTLASTPRIV_PTR_ADDR_I:%.*]] = alloca double*, align 8 // CHECK3-NEXT: [[DOTLASTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[I_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP_I:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK3-NEXT: [[REF_TMP_I:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 @@ -1135,7 +1154,7 @@ // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK3-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -1164,15 +1183,15 @@ // CHECK3-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK3-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK3-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: [[TMP22:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, double**, i32**)* // CHECK3-NEXT: call void [[TMP25]](i8* [[TMP24]], double** [[DOTLASTPRIV_PTR_ADDR_I]], i32** [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR3:[0-9]+]] -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP22]], i32 0, i32 0 // CHECK3-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP26]], align 8 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP22]], i32 0, i32 1 // CHECK3-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP28]], align 8 // CHECK3-NEXT: [[TMP30:%.*]] = load double*, double** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -1191,11 +1210,11 @@ // CHECK3-NEXT: store i32 [[TMP35]], i32* [[I_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK3-NEXT: store double 1.000000e+00, double* [[TMP30]], align 8, !llvm.access.group !15 // CHECK3-NEXT: store i32 11, i32* [[TMP31]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP_I]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP_I]], i32 0, i32 0 // CHECK3-NEXT: store double* [[TMP30]], double** [[TMP36]], align 8, !noalias !14, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP_I]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP_I]], i32 0, i32 1 // CHECK3-NEXT: store i32* [[TMP31]], i32** [[TMP37]], align 8, !noalias !14, !llvm.access.group !15 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(16) [[REF_TMP_I]]) #[[ATTR3]], !llvm.access.group !15 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(16) [[REF_TMP_I]]) #[[ATTR3]], !llvm.access.group !15 // CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK3-NEXT: [[ADD3_I:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK3-NEXT: store i32 [[ADD3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 @@ -1248,54 +1267,58 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK4-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK4-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK4-NEXT: store double* @g, double** [[TMP4]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK4-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP5]], align 8 -// CHECK4-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK4-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct.kmp_task_t_with_privates* -// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 [[TMP11]], i64 16, i1 false) -// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 5 -// CHECK4-NEXT: store i64 0, i64* [[TMP13]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 6 -// CHECK4-NEXT: store i64 9, i64* [[TMP14]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 7 -// CHECK4-NEXT: store i64 1, i64* [[TMP15]], align 8 -// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 9 -// CHECK4-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i8* -// CHECK4-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP17]], i8 0, i64 8, i1 false) -// CHECK4-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP15]], align 8 -// CHECK4-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP6]], i32 1, i64* [[TMP13]], i64* [[TMP14]], i64 [[TMP18]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) -// CHECK4-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK4-NEXT: store double* @g, double** [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK4-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP6]], align 8 +// CHECK4-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP7:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.kmp_task_t_with_privates* +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP8]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP9]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP11]], i8* align 8 [[TMP12]], i64 16, i1 false) +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP8]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP9]], i32 0, i32 5 +// CHECK4-NEXT: store i64 0, i64* [[TMP14]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP9]], i32 0, i32 6 +// CHECK4-NEXT: store i64 9, i64* [[TMP15]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP9]], i32 0, i32 7 +// CHECK4-NEXT: store i64 1, i64* [[TMP16]], align 8 +// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP9]], i32 0, i32 9 +// CHECK4-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i8* +// CHECK4-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP18]], i8 0, i64 8, i1 false) +// CHECK4-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP16]], align 8 +// CHECK4-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* [[TMP7]], i32 1, i64* [[TMP14]], i64* [[TMP15]], i64 [[TMP19]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) +// CHECK4-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: ret void @@ -1348,7 +1371,7 @@ // CHECK4-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK4-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK4-NEXT: [[DOTLASTPRIV_PTR_ADDR_I:%.*]] = alloca double*, align 8 // CHECK4-NEXT: [[DOTLASTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[I_I:%.*]] = alloca i32, align 4 @@ -1364,7 +1387,7 @@ // CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK4-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK4-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -1393,15 +1416,15 @@ // CHECK4-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK4-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK4-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: [[TMP22:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, double**, i32**)* // CHECK4-NEXT: call void [[TMP25]](i8* [[TMP24]], double** [[DOTLASTPRIV_PTR_ADDR_I]], i32** [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR4:[0-9]+]] -// CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP22]], i32 0, i32 0 // CHECK4-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP26]], align 8 -// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP22]], i32 0, i32 1 // CHECK4-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP28]], align 8 // CHECK4-NEXT: [[TMP30:%.*]] = load double*, double** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -1485,67 +1508,74 @@ // CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK5-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK5-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK5-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, float**, %struct.St**)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], float** [[A_ADDR]], %struct.St** [[S_ADDR]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store float** [[A_ADDR]], float*** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.St** [[S_ADDR]], %struct.St*** [[TMP4]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], float** noundef nonnull align 8 dereferenceable(8) [[A:%.*]], %struct.St** noundef nonnull align 8 dereferenceable(8) [[S:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float**, align 8 -// CHECK5-NEXT: [[S_ADDR:%.*]] = alloca %struct.St**, align 8 -// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK5-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 -// CHECK5-NEXT: store %struct.St** [[S]], %struct.St*** [[S_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.St**, %struct.St*** [[S_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK5-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load float**, float*** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.St**, %struct.St*** [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK5-NEXT: br i1 [[TMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK5-NEXT: store i64 [[TMP0]], i64* [[TMP7]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK5-NEXT: store float** [[TMP1]], float*** [[TMP8]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK5-NEXT: store %struct.St** [[TMP2]], %struct.St*** [[TMP9]], align 8 -// CHECK5-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP10:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1, i64 96, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK5-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.kmp_task_t_with_privates* -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP11]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP12]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8 -// CHECK5-NEXT: [[TMP15:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 24, i1 false) -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP11]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP12]], i32 0, i32 5 -// CHECK5-NEXT: store i64 0, i64* [[TMP17]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP12]], i32 0, i32 6 -// CHECK5-NEXT: store i64 9, i64* [[TMP18]], align 8 -// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP12]], i32 0, i32 7 -// CHECK5-NEXT: store i64 1, i64* [[TMP19]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP12]], i32 0, i32 9 -// CHECK5-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i8* -// CHECK5-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP21]], i8 0, i64 8, i1 false) -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[TMP19]], align 8 -// CHECK5-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP10]], i32 1, i64* [[TMP17]], i64* [[TMP18]], i64 [[TMP22]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) -// CHECK5-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK5-NEXT: store float** [[TMP4]], float*** [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.St** [[TMP6]], %struct.St*** [[TMP13]], align 8 +// CHECK5-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP14:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 1, i64 96, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK5-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to %struct.kmp_task_t_with_privates* +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP15]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP16]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i8*, i8** [[TMP17]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP18]], i8* align 8 [[TMP19]], i64 24, i1 false) +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP15]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP16]], i32 0, i32 5 +// CHECK5-NEXT: store i64 0, i64* [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP16]], i32 0, i32 6 +// CHECK5-NEXT: store i64 9, i64* [[TMP22]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP16]], i32 0, i32 7 +// CHECK5-NEXT: store i64 1, i64* [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP16]], i32 0, i32 9 +// CHECK5-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i8* +// CHECK5-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP25]], i8 0, i64 8, i1 false) +// CHECK5-NEXT: [[TMP26:%.*]] = load i64, i64* [[TMP23]], align 8 +// CHECK5-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i8* [[TMP14]], i32 1, i64* [[TMP21]], i64* [[TMP22]], i64 [[TMP26]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) +// CHECK5-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: // CHECK5-NEXT: ret void @@ -1583,7 +1613,7 @@ // CHECK5-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK5-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK5-NEXT: [[DOTLASTPRIV_PTR_ADDR_I:%.*]] = alloca float**, align 8 // CHECK5-NEXT: [[DOTLASTPRIV_PTR_ADDR1_I:%.*]] = alloca %struct.St**, align 8 // CHECK5-NEXT: [[I_I:%.*]] = alloca i32, align 4 @@ -1598,7 +1628,7 @@ // CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK5-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK5-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -1627,17 +1657,17 @@ // CHECK5-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK5-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// CHECK5-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK5-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: [[TMP22:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP22]], i32 0, i32 0 // CHECK5-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 // CHECK5-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: [[TMP27:%.*]] = bitcast void (i8*, ...)* [[TMP25]] to void (i8*, float***, %struct.St***)* // CHECK5-NEXT: call void [[TMP27]](i8* [[TMP26]], float*** [[DOTLASTPRIV_PTR_ADDR_I]], %struct.St*** [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR2:[0-9]+]] -// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP22]], i32 0, i32 1 // CHECK5-NEXT: [[TMP29:%.*]] = load float**, float*** [[TMP28]], align 8 -// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP22]], i32 0, i32 2 // CHECK5-NEXT: [[TMP31:%.*]] = load %struct.St**, %struct.St*** [[TMP30]], align 8 // CHECK5-NEXT: [[TMP32:%.*]] = load float**, float*** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: [[TMP33:%.*]] = load %struct.St**, %struct.St*** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -1695,56 +1725,62 @@ // CHECK6-NEXT: entry: // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[I]], i32* [[J]]) +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store i32* [[I]], i32** [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store i32* [[J]], i32** [[TMP1]], align 8 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[J:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK6-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK6-NEXT: [[J_ADDR:%.*]] = alloca i32*, align 8 -// CHECK6-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK6-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK6-NEXT: store i32* [[J]], i32** [[J_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32*, i32** [[J_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK6-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK6-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK6-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK6-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK6-NEXT: br i1 [[TMP8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK6: omp_if.then: -// CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK6-NEXT: store i32* [[TMP0]], i32** [[TMP6]], align 8 -// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK6-NEXT: store i32* [[TMP1]], i32** [[TMP7]], align 8 -// CHECK6-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK6-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK6-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct.kmp_task_t_with_privates* -// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP9]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP12:%.*]] = load i8*, i8** [[TMP11]], align 8 -// CHECK6-NEXT: [[TMP13:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 [[TMP13]], i64 16, i1 false) -// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 5 -// CHECK6-NEXT: store i64 0, i64* [[TMP14]], align 8 -// CHECK6-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 6 -// CHECK6-NEXT: store i64 9, i64* [[TMP15]], align 8 -// CHECK6-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 7 -// CHECK6-NEXT: store i64 1, i64* [[TMP16]], align 8 -// CHECK6-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 9 -// CHECK6-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i8* -// CHECK6-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP18]], i8 0, i64 8, i1 false) -// CHECK6-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP16]], align 8 -// CHECK6-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i8* [[TMP8]], i32 1, i64* [[TMP14]], i64* [[TMP15]], i64 [[TMP19]], i32 1, i32 0, i64 0, i8* null) -// CHECK6-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK6-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK6-NEXT: store i32* [[TMP2]], i32** [[TMP9]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK6-NEXT: store i32* [[TMP4]], i32** [[TMP10]], align 8 +// CHECK6-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK6-NEXT: [[TMP11:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 1, i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK6-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to %struct.kmp_task_t_with_privates* +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP12]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP13]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP15:%.*]] = load i8*, i8** [[TMP14]], align 8 +// CHECK6-NEXT: [[TMP16:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP15]], i8* align 8 [[TMP16]], i64 16, i1 false) +// CHECK6-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP13]], i32 0, i32 5 +// CHECK6-NEXT: store i64 0, i64* [[TMP17]], align 8 +// CHECK6-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP13]], i32 0, i32 6 +// CHECK6-NEXT: store i64 9, i64* [[TMP18]], align 8 +// CHECK6-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP13]], i32 0, i32 7 +// CHECK6-NEXT: store i64 1, i64* [[TMP19]], align 8 +// CHECK6-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP13]], i32 0, i32 9 +// CHECK6-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i8* +// CHECK6-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP21]], i8 0, i64 8, i1 false) +// CHECK6-NEXT: [[TMP22:%.*]] = load i64, i64* [[TMP19]], align 8 +// CHECK6-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i8* [[TMP11]], i32 1, i64* [[TMP17]], i64* [[TMP18]], i64 [[TMP22]], i32 1, i32 0, i64 0, i8* null) +// CHECK6-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK6-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK6-NEXT: br label [[OMP_IF_END]] // CHECK6: omp_if.end: // CHECK6-NEXT: ret void @@ -1763,7 +1799,7 @@ // CHECK6-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK6-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK6-NEXT: [[DOTLINEAR_START_I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTLINEAR_START1_I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I_I:%.*]] = alloca i32, align 4 @@ -1779,7 +1815,7 @@ // CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK6-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK6-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK6-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK6-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* // CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK6-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8 @@ -1806,17 +1842,17 @@ // CHECK6-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // CHECK6-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK6-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// CHECK6-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK6-NEXT: [[TMP20:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK6-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP20]], i32 0, i32 0 +// CHECK6-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: [[TMP20:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP20]], i32 0, i32 0 // CHECK6-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP21]], align 8 // CHECK6-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 // CHECK6-NEXT: store i32 [[TMP23]], i32* [[DOTLINEAR_START_I]], align 4, !noalias !14 -// CHECK6-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP20]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP20]], i32 0, i32 1 // CHECK6-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP24]], align 8 // CHECK6-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 // CHECK6-NEXT: store i32 [[TMP26]], i32* [[DOTLINEAR_START1_I]], align 4, !noalias !14 -// CHECK6-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP20]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP20]], i32 0, i32 0 // CHECK6-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP27]], align 8 // CHECK6-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK6-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP29]] to i32 diff --git a/clang/test/OpenMP/parallel_private_codegen.cpp b/clang/test/OpenMP/parallel_private_codegen.cpp --- a/clang/test/OpenMP/parallel_private_codegen.cpp +++ b/clang/test/OpenMP/parallel_private_codegen.cpp @@ -177,6 +177,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN2SSC1ERi(%struct.SS* noundef nonnull align 8 dereferenceable(16) [[SS]], i32* noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE5sivar) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) @@ -188,7 +189,7 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4:[0-9]+]] @@ -244,10 +245,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -255,6 +257,8 @@ // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] @@ -266,20 +270,20 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP1:%.*]] = bitcast %struct.S* [[ARRAYIDX1]] to i8* -// CHECK1-NEXT: [[TMP2:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast %struct.S* [[ARRAYIDX1]] to i8* +// CHECK1-NEXT: [[TMP3:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP2]], i8* align 4 [[TMP3]], i64 4, i1 false) // CHECK1-NEXT: store i32 3, i32* [[SIVAR]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP3]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP4]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -302,36 +306,37 @@ // CHECK1-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 128 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN3SSTIiEC1Ev(%struct.SST* noundef nonnull align 4 dereferenceable(4) [[SST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 128 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP0]], i8* align 128 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK1-NEXT: ret i32 [[TMP2]] // @@ -341,6 +346,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 @@ -354,16 +360,18 @@ // CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[D_ADDR]], align 8 // CHECK1-NEXT: store i32* [[TMP0]], i32** [[C]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -371,21 +379,23 @@ // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32* [[A]], i32** [[TMP]], align 8 // CHECK1-NEXT: store i32* [[C]], i32** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[B]], align 4 -// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP5]], -1 // CHECK1-NEXT: store i32 [[DEC]], i32* [[B]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK1-NEXT: store i32 [[DIV]], i32* [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[DIV]], i32* [[TMP6]], align 4 // CHECK1-NEXT: ret void // // @@ -424,12 +434,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK1-NEXT: ret void // // @@ -444,78 +454,81 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 128 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[T_VAR]], align 128 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR]], align 128 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[ARRAYIDX]], align 128 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP1:%.*]] = bitcast %struct.S.0* [[ARRAYIDX1]] to i8* -// CHECK1-NEXT: [[TMP2:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP1]], i8* align 128 [[TMP2]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[ARRAYIDX]], align 128 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast %struct.S.1* [[ARRAYIDX1]] to i8* +// CHECK1-NEXT: [[TMP3:%.*]] = bitcast %struct.S.1* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP2]], i8* align 128 [[TMP3]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP4]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done3: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, i32* [[F]], align 4 // CHECK1-NEXT: ret void // @@ -524,59 +537,64 @@ // CHECK1-SAME: (%struct.SST* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, i32* [[A]], align 4 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.SST* [[THIS1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SST* [[THIS1]], %struct.SST** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SST* noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: [[DOTA__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP2]], i64 4, i8* inttoptr (i64 2 to i8*)) +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SST*, %struct.SST** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: [[DOTA__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP4]], i64 4, i8* inttoptr (i64 2 to i8*)) // CHECK1-NEXT: [[DOTA__ADDR:%.*]] = bitcast i8* [[DOTA__VOID_ADDR]] to i32* // CHECK1-NEXT: store i32* [[DOTA__ADDR]], i32** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[DOTA__ADDR]] to i8* -// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP2]], i8* [[TMP5]], i8* inttoptr (i64 2 to i8*)) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i32* [[DOTA__ADDR]] to i8* +// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP4]], i8* [[TMP7]], i8* inttoptr (i64 2 to i8*)) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -610,6 +628,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK3-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 @@ -623,16 +642,18 @@ // CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[D_ADDR]], align 8 // CHECK3-NEXT: store i32* [[TMP0]], i32** [[C]], align 8 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP1]], align 8 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -641,20 +662,22 @@ // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK3-NEXT: store i32* [[A]], i32** [[TMP]], align 8 // CHECK3-NEXT: store i32* [[C]], i32** [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK3-NEXT: store i32* [[TMP3]], i32** [[TMP2]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[B]], i32** [[TMP4]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[_TMP1]], align 8 -// CHECK3-NEXT: store i32* [[TMP6]], i32** [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[B]], i32** [[TMP6]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK3-NEXT: store i32* [[TMP8]], i32** [[TMP7]], align 8 // CHECK3-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -663,6 +686,7 @@ // CHECK3-SAME: (%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR2:[0-9]+]] align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.0*, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK3-NEXT: store %class.anon.0* [[THIS]], %class.anon.0** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load %class.anon.0*, %class.anon.0** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], %class.anon.0* [[THIS1]], i32 0, i32 0 @@ -682,16 +706,18 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[DIV]], i32* [[TMP9]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.SS* [[TMP1]]) +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP1]], %struct.SS** [[TMP11]], align 8 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -699,41 +725,46 @@ // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK3-NEXT: store i32* [[A]], i32** [[TMP]], align 8 // CHECK3-NEXT: store i32* [[C]], i32** [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[B]], align 4 -// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP5]], -1 // CHECK3-NEXT: store i32 [[DEC]], i32* [[B]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK3-NEXT: store i32 [[DIV]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP7]], 1 +// CHECK3-NEXT: store i32 [[DIV]], i32* [[TMP6]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 1, i32* [[G]], align 128 // CHECK3-NEXT: store i32 2, i32* [[SIVAR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store i32* [[G]], i32** [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[TMP1]], align 8 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[G]], i32** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[TMP2]], align 8 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.3* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: ret void // // @@ -768,23 +799,27 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK4-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>, align 128 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK4-NEXT: store i32 1, i32* [[G]], align 128 // CHECK4-NEXT: store i32 20, i32* [[SIVAR]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]], i32 0, i32 0 @@ -798,18 +833,18 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.1 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP0:%.*]] = load volatile i32, i32* [[G]], align 128 -// CHECK4-NEXT: store volatile i32 [[TMP0]], i32* [[BLOCK_CAPTURED]], align 128 +// CHECK4-NEXT: [[TMP1:%.*]] = load volatile i32, i32* [[G]], align 128 +// CHECK4-NEXT: store volatile i32 [[TMP1]], i32* [[BLOCK_CAPTURED]], align 128 // CHECK4-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIVAR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[BLOCK_CAPTURED1]], align 32 -// CHECK4-NEXT: [[TMP2:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]] to void ()* -// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP2]] to %struct.__block_literal_generic* -// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP4:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK4-NEXT: [[TMP5:%.*]] = load i8*, i8** [[TMP3]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP6]](i8* noundef [[TMP4]]) +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[BLOCK_CAPTURED1]], align 32 +// CHECK4-NEXT: [[TMP3:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]] to void ()* +// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP3]] to %struct.__block_literal_generic* +// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP5:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK4-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP4]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP7]](i8* noundef [[TMP5]]) // CHECK4-NEXT: ret void // // @@ -833,6 +868,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK4-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 @@ -846,16 +882,18 @@ // CHECK4-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2 // CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[D_ADDR]], align 8 // CHECK4-NEXT: store i32* [[TMP0]], i32** [[C]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]]) +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP1]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -864,8 +902,10 @@ // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK4-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK4-NEXT: store i32* [[A]], i32** [[TMP]], align 8 // CHECK4-NEXT: store i32* [[C]], i32** [[_TMP1]], align 8 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 0 @@ -879,23 +919,23 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.4 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS_ADDR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[BLOCK_CAPTURED_THIS_ADDR]], align 8 +// CHECK4-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[BLOCK_CAPTURED_THIS_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK4-NEXT: store i32* [[TMP1]], i32** [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK4-NEXT: store i32* [[TMP3]], i32** [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[B]], align 4 -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[BLOCK_CAPTURED2]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], i32* [[BLOCK_CAPTURED2]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED3:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[_TMP1]], align 8 -// CHECK4-NEXT: store i32* [[TMP3]], i32** [[BLOCK_CAPTURED3]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]] to void ()* -// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP4]] to %struct.__block_literal_generic* -// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP6:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP5]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP8]](i8* noundef [[TMP6]]) +// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[BLOCK_CAPTURED3]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]] to void ()* +// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP6]] to %struct.__block_literal_generic* +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK4-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP7]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP10]](i8* noundef [[TMP8]]) // CHECK4-NEXT: ret void // // @@ -904,6 +944,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>** [[BLOCK_ADDR]], align 8 @@ -923,16 +964,18 @@ // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 1 // CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP3]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.SS* [[THIS]]) +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[TMP5]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -940,20 +983,22 @@ // CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK4-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK4-NEXT: store i32* [[A]], i32** [[TMP]], align 8 // CHECK4-NEXT: store i32* [[C]], i32** [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK4-NEXT: store i32 [[INC]], i32* [[TMP1]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[B]], align 4 -// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK4-NEXT: store i32 [[INC]], i32* [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP5]], -1 // CHECK4-NEXT: store i32 [[DEC]], i32* [[B]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP4]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP7]], 1 +// CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP6]], align 4 // CHECK4-NEXT: ret void // diff --git a/clang/test/OpenMP/parallel_reduction_codegen.cpp b/clang/test/OpenMP/parallel_reduction_codegen.cpp --- a/clang/test/OpenMP/parallel_reduction_codegen.cpp +++ b/clang/test/OpenMP/parallel_reduction_codegen.cpp @@ -332,18 +332,21 @@ // CHECK1-SAME: (i16* noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i16*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i16* [[X]], i16** [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[X_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*)* @.omp_outlined. to void (i32*, i32*, ...)*), i16* [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16*, i16** [[X_ADDR]], align 8 +// CHECK1-NEXT: store i16* [[TMP1]], i16** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef [[X:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i16*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i16*, align 8 @@ -352,109 +355,111 @@ // CHECK1-NEXT: [[_TMP13:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i16* [[X]], i16** [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[X_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i64 0 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16*, i16** [[X_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = ptrtoint i16* [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint i16* [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] -// CHECK1-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP6:%.*]] = add nuw i64 [[TMP5]], 1 -// CHECK1-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP8:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP8]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP6]], align 16 -// CHECK1-NEXT: store i64 [[TMP6]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[VLA]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[VLA]], [[TMP9]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP2]], i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 0 +// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint i16* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint i16* [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP8:%.*]] = add nuw i64 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP8]], align 16 +// CHECK1-NEXT: store i64 [[TMP8]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i16, i16* [[VLA]], i64 [[TMP8]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[VLA]], [[TMP11]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP10:%.*]] = load i16*, i16** [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint i16* [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = ptrtoint i16* [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr i16, i16* [[VLA]], i64 [[TMP14]] -// CHECK1-NEXT: store i16* [[TMP15]], i16** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i16* [[VLA]] to i8* -// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP6]] to i8* +// CHECK1-NEXT: [[TMP12:%.*]] = load i16*, i16** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = ptrtoint i16* [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint i16* [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = sub i64 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = sdiv exact i64 [[TMP15]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i16, i16* [[VLA]], i64 [[TMP16]] +// CHECK1-NEXT: store i16* [[TMP17]], i16** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i16* [[VLA]] to i8* // CHECK1-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP21]], i32 1, i64 16, i8* [[TMP22]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP8]] to i8* +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP23]], i32 1, i64 16, i8* [[TMP24]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i16, i16* [[ARRAYIDX]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[ARRAYIDX]], [[TMP24]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr i16, i16* [[ARRAYIDX]], i64 [[TMP8]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[ARRAYIDX]], [[TMP26]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i16* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST2:%.*]] = phi i16* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP25:%.*]] = load i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP25]] to i32 -// CHECK1-NEXT: [[TMP26:%.*]] = load i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP26]] to i32 +// CHECK1-NEXT: [[TMP27:%.*]] = load i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP27]] to i32 +// CHECK1-NEXT: [[TMP28:%.*]] = load i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 2 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP28]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV3]] // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD]] to i16 // CHECK1-NEXT: store i16 [[CONV4]], i16* [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 2 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST2]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE6:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP24]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE6:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP26]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done7: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr i16, i16* [[ARRAYIDX]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY8:%.*]] = icmp eq i16* [[ARRAYIDX]], [[TMP27]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr i16, i16* [[ARRAYIDX]], i64 [[TMP8]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY8:%.*]] = icmp eq i16* [[ARRAYIDX]], [[TMP29]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY8]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY9:%.*]] // CHECK1: omp.arraycpy.body9: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST10:%.*]] = phi i16* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT19:%.*]], [[ATOMIC_EXIT:%.*]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi i16* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP28:%.*]] = load i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 -// CHECK1-NEXT: [[CONV12:%.*]] = sext i16 [[TMP28]] to i32 +// CHECK1-NEXT: [[TMP30:%.*]] = load i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 +// CHECK1-NEXT: [[CONV12:%.*]] = sext i16 [[TMP30]] to i32 // CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST11]] monotonic, align 2 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP29:%.*]] = phi i16 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY9]] ], [ [[TMP34:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i16 [[TMP29]], i16* [[_TMP13]], align 2 -// CHECK1-NEXT: [[TMP30:%.*]] = load i16, i16* [[_TMP13]], align 2 -// CHECK1-NEXT: [[CONV14:%.*]] = sext i16 [[TMP30]] to i32 -// CHECK1-NEXT: [[TMP31:%.*]] = load i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 -// CHECK1-NEXT: [[CONV15:%.*]] = sext i16 [[TMP31]] to i32 +// CHECK1-NEXT: [[TMP31:%.*]] = phi i16 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY9]] ], [ [[TMP36:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i16 [[TMP31]], i16* [[_TMP13]], align 2 +// CHECK1-NEXT: [[TMP32:%.*]] = load i16, i16* [[_TMP13]], align 2 +// CHECK1-NEXT: [[CONV14:%.*]] = sext i16 [[TMP32]] to i32 +// CHECK1-NEXT: [[TMP33:%.*]] = load i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 +// CHECK1-NEXT: [[CONV15:%.*]] = sext i16 [[TMP33]] to i32 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV14]], [[CONV15]] // CHECK1-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i16 // CHECK1-NEXT: store i16 [[CONV17]], i16* [[ATOMIC_TEMP]], align 2 -// CHECK1-NEXT: [[TMP32:%.*]] = load i16, i16* [[ATOMIC_TEMP]], align 2 -// CHECK1-NEXT: [[TMP33:%.*]] = cmpxchg i16* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i16 [[TMP29]], i16 [[TMP32]] monotonic monotonic, align 2 -// CHECK1-NEXT: [[TMP34]] = extractvalue { i16, i1 } [[TMP33]], 0 -// CHECK1-NEXT: [[TMP35:%.*]] = extractvalue { i16, i1 } [[TMP33]], 1 -// CHECK1-NEXT: br i1 [[TMP35]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i16, i16* [[ATOMIC_TEMP]], align 2 +// CHECK1-NEXT: [[TMP35:%.*]] = cmpxchg i16* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i16 [[TMP31]], i16 [[TMP34]] monotonic monotonic, align 2 +// CHECK1-NEXT: [[TMP36]] = extractvalue { i16, i1 } [[TMP35]], 0 +// CHECK1-NEXT: [[TMP37:%.*]] = extractvalue { i16, i1 } [[TMP35]], 1 +// CHECK1-NEXT: br i1 [[TMP37]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT19]] = getelementptr i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST10]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP27]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP29]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY9]] // CHECK1: omp.arraycpy.done21: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP36:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP36]]) +// CHECK1-NEXT: [[TMP38:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP38]]) // CHECK1-NEXT: ret void // // @@ -512,6 +517,9 @@ // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[CF:%.*]] = alloca { float, float }, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN2SSC1ERi(%struct.SS* noundef nonnull align 8 dereferenceable(16) [[SS]], i32* noundef nonnull align 4 dereferenceable(4) @sivar) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) @@ -524,32 +532,58 @@ // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, float*, [2 x %struct.S]*, %struct.S*, %struct.S*, float*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], float* [[T_VAR]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[VAR]], %struct.S* [[VAR1]], float* [[T_VAR1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store float* [[T_VAR]], float** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store %struct.S* [[VAR1]], %struct.S** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store float* [[T_VAR1]], float** [[TMP6]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] // CHECK1: if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, float*, [2 x %struct.S]*, %struct.S*, %struct.S*, float*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], float* [[T_VAR]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[VAR]], %struct.S* [[VAR1]], float* [[T_VAR1]]) +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 1 +// CHECK1-NEXT: store float* [[T_VAR]], float** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 3 +// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 4 +// CHECK1-NEXT: store %struct.S* [[VAR1]], %struct.S** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 5 +// CHECK1-NEXT: store float* [[T_VAR1]], float** [[TMP12]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK1-NEXT: br label [[IF_END]] // CHECK1: if.end: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { float, float }*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), { float, float }* [[CF]]) -// CHECK1-NEXT: [[CALL1:%.*]] = call noundef i32 @_Z5tmainIiET_v() -// CHECK1-NEXT: store i32 [[CALL1]], i32* [[RETVAL]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK1-NEXT: store { float, float }* [[CF]], { float, float }** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_2]]) +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// CHECK1-NEXT: store i32 [[CALL3]], i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP1]], [[IF_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP14]], [[IF_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done2: +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done4: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP15]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC1ERi @@ -589,169 +623,166 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca float*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca float*, align 8 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[VAR3:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[VAR14:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[T_VAR15:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[REF_TMP12:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[REF_TMP7:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca float, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store float* [[T_VAR]], float** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR1]], %struct.S** [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store float* [[T_VAR1]], float** [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load float*, float** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S*, %struct.S** [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load float*, float** [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store float 0.000000e+00, float* [[T_VAR2]], align 4 -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: store float 0x47EFFFFFE0000000, float* [[T_VAR15]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP6]] to i32 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load float*, float** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.S*, %struct.S** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP11]], align 8 +// CHECK1-NEXT: store float 0.000000e+00, float* [[T_VAR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: store float 0x47EFFFFFE0000000, float* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load float, float* [[T_VAR]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP13]] to i32 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: store i32 [[CONV]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast %struct.S* [[VAR3]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP7]], i8* align 4 [[TMP8]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast float* [[T_VAR2]] to i8* -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[VAR3]] to i8* -// CHECK1-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR14]] to i8* -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast float* [[T_VAR15]] to i8* -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]], i32 4, i64 32, i8* [[TMP19]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[ARRAYIDX1]] to i8* +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast float* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR1]] to i8* +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast float* [[T_VAR1]] to i8* +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]], i32 4, i64 32, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load float, float* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP21]], [[TMP22]] -// CHECK1-NEXT: store float [[ADD]], float* [[TMP1]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.S* [[TMP3]] to i8* -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast %struct.S* [[CALL]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP23]], i8* align 4 [[TMP24]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL7:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP4]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL7]], 0.000000e+00 +// CHECK1-NEXT: [[TMP28:%.*]] = load float, float* [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load float, float* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP28]], [[TMP29]] +// CHECK1-NEXT: store float [[ADD]], float* [[TMP4]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[TMP8]] to i8* +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[CALL]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL2:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP10]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL2]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL8:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL9:%.*]] = fcmp une float [[CALL8]], 0.000000e+00 +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL4:%.*]] = fcmp une float [[CALL3]], 0.000000e+00 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP25:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL9]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV10:%.*]] = uitofp i1 [[TMP25]] to float -// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], float noundef [[CONV10]]) -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[TMP4]] to i8* -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP32:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL4]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV5:%.*]] = uitofp i1 [[TMP32]] to float +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], float noundef [[CONV5]]) +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[TMP10]] to i8* +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[REF_TMP]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP28:%.*]] = load float, float* [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load float, float* [[T_VAR15]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp olt float [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP35:%.*]] = load float, float* [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load float, float* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp olt float [[TMP35]], [[TMP36]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP30:%.*]] = load float, float* [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load float, float* [[TMP12]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP31:%.*]] = load float, float* [[T_VAR15]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load float, float* [[T_VAR1]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi float [ [[TMP30]], [[COND_TRUE]] ], [ [[TMP31]], [[COND_FALSE]] ] -// CHECK1-NEXT: store float [[COND]], float* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[COND:%.*]] = phi float [ [[TMP37]], [[COND_TRUE]] ], [ [[TMP38]], [[COND_FALSE]] ] +// CHECK1-NEXT: store float [[COND]], float* [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP32:%.*]] = load float, float* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = atomicrmw fadd float* [[TMP1]], float [[TMP32]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL11:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP3]] to i8* -// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[CALL11]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL13:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP4]]) -// CHECK1-NEXT: [[TOBOOL14:%.*]] = fcmp une float [[CALL13]], 0.000000e+00 -// CHECK1-NEXT: br i1 [[TOBOOL14]], label [[LAND_RHS15:%.*]], label [[LAND_END18:%.*]] -// CHECK1: land.rhs15: -// CHECK1-NEXT: [[CALL16:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL17:%.*]] = fcmp une float [[CALL16]], 0.000000e+00 -// CHECK1-NEXT: br label [[LAND_END18]] -// CHECK1: land.end18: -// CHECK1-NEXT: [[TMP36:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL17]], [[LAND_RHS15]] ] -// CHECK1-NEXT: [[CONV19:%.*]] = uitofp i1 [[TMP36]] to float -// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP12]], float noundef [[CONV19]]) -// CHECK1-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP4]] to i8* -// CHECK1-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[REF_TMP12]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP12]]) #[[ATTR5]] -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP39:%.*]] = load float, float* [[T_VAR15]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = bitcast float* [[TMP5]] to i32* -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP40]] monotonic, align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load float, float* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = atomicrmw fadd float* [[TMP4]], float [[TMP39]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL6:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP41:%.*]] = bitcast %struct.S* [[TMP8]] to i8* +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast %struct.S* [[CALL6]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP41]], i8* align 4 [[TMP42]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL8:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP10]]) +// CHECK1-NEXT: [[TOBOOL9:%.*]] = fcmp une float [[CALL8]], 0.000000e+00 +// CHECK1-NEXT: br i1 [[TOBOOL9]], label [[LAND_RHS10:%.*]], label [[LAND_END13:%.*]] +// CHECK1: land.rhs10: +// CHECK1-NEXT: [[CALL11:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL12:%.*]] = fcmp une float [[CALL11]], 0.000000e+00 +// CHECK1-NEXT: br label [[LAND_END13]] +// CHECK1: land.end13: +// CHECK1-NEXT: [[TMP43:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL12]], [[LAND_RHS10]] ] +// CHECK1-NEXT: [[CONV14:%.*]] = uitofp i1 [[TMP43]] to float +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP7]], float noundef [[CONV14]]) +// CHECK1-NEXT: [[TMP44:%.*]] = bitcast %struct.S* [[TMP10]] to i8* +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[REF_TMP7]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP44]], i8* align 4 [[TMP45]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP7]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP46:%.*]] = load float, float* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast float* [[TMP12]] to i32* +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP47]] monotonic, align 4 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP41:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END18]] ], [ [[TMP51:%.*]], [[COND_END23:%.*]] ] -// CHECK1-NEXT: [[TMP42:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32* -// CHECK1-NEXT: [[TMP43:%.*]] = bitcast i32 [[TMP41]] to float -// CHECK1-NEXT: store float [[TMP43]], float* [[TMP]], align 4 -// CHECK1-NEXT: [[TMP44:%.*]] = load float, float* [[TMP]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = load float, float* [[T_VAR15]], align 4 -// CHECK1-NEXT: [[CMP20:%.*]] = fcmp olt float [[TMP44]], [[TMP45]] -// CHECK1-NEXT: br i1 [[CMP20]], label [[COND_TRUE21:%.*]], label [[COND_FALSE22:%.*]] -// CHECK1: cond.true21: -// CHECK1-NEXT: [[TMP46:%.*]] = load float, float* [[TMP]], align 4 -// CHECK1-NEXT: br label [[COND_END23]] -// CHECK1: cond.false22: -// CHECK1-NEXT: [[TMP47:%.*]] = load float, float* [[T_VAR15]], align 4 -// CHECK1-NEXT: br label [[COND_END23]] -// CHECK1: cond.end23: -// CHECK1-NEXT: [[COND24:%.*]] = phi float [ [[TMP46]], [[COND_TRUE21]] ], [ [[TMP47]], [[COND_FALSE22]] ] -// CHECK1-NEXT: store float [[COND24]], float* [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[TMP42]], align 4 -// CHECK1-NEXT: [[TMP49:%.*]] = bitcast float* [[TMP5]] to i32* -// CHECK1-NEXT: [[TMP50:%.*]] = cmpxchg i32* [[TMP49]], i32 [[TMP41]], i32 [[TMP48]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP51]] = extractvalue { i32, i1 } [[TMP50]], 0 -// CHECK1-NEXT: [[TMP52:%.*]] = extractvalue { i32, i1 } [[TMP50]], 1 -// CHECK1-NEXT: br i1 [[TMP52]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP48:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END13]] ], [ [[TMP58:%.*]], [[COND_END18:%.*]] ] +// CHECK1-NEXT: [[TMP49:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32* +// CHECK1-NEXT: [[TMP50:%.*]] = bitcast i32 [[TMP48]] to float +// CHECK1-NEXT: store float [[TMP50]], float* [[TMP]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = load float, float* [[TMP]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = load float, float* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP15:%.*]] = fcmp olt float [[TMP51]], [[TMP52]] +// CHECK1-NEXT: br i1 [[CMP15]], label [[COND_TRUE16:%.*]], label [[COND_FALSE17:%.*]] +// CHECK1: cond.true16: +// CHECK1-NEXT: [[TMP53:%.*]] = load float, float* [[TMP]], align 4 +// CHECK1-NEXT: br label [[COND_END18]] +// CHECK1: cond.false17: +// CHECK1-NEXT: [[TMP54:%.*]] = load float, float* [[T_VAR1]], align 4 +// CHECK1-NEXT: br label [[COND_END18]] +// CHECK1: cond.end18: +// CHECK1-NEXT: [[COND19:%.*]] = phi float [ [[TMP53]], [[COND_TRUE16]] ], [ [[TMP54]], [[COND_FALSE17]] ] +// CHECK1-NEXT: store float [[COND19]], float* [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[TMP49]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = bitcast float* [[TMP12]] to i32* +// CHECK1-NEXT: [[TMP57:%.*]] = cmpxchg i32* [[TMP56]], i32 [[TMP48]], i32 [[TMP55]] monotonic monotonic, align 4 +// CHECK1-NEXT: [[TMP58]] = extractvalue { i32, i1 } [[TMP57]], 0 +// CHECK1-NEXT: [[TMP59:%.*]] = extractvalue { i32, i1 } [[TMP57]], 1 +// CHECK1-NEXT: br i1 [[TMP59]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) #[[ATTR5]] -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK1-NEXT: ret void // // @@ -861,108 +892,107 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca float*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca float*, align 8 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[VAR3:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[VAR14:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[T_VAR15:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca float, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store float* [[T_VAR]], float** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR1]], %struct.S** [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store float* [[T_VAR1]], float** [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load float*, float** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S*, %struct.S** [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load float*, float** [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store float 0.000000e+00, float* [[T_VAR2]], align 4 -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: store float 0x47EFFFFFE0000000, float* [[T_VAR15]], align 4 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load float*, float** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.S*, %struct.S** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP11]], align 8 +// CHECK1-NEXT: store float 0.000000e+00, float* [[T_VAR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: store float 0x47EFFFFFE0000000, float* [[T_VAR1]], align 4 // CHECK1-NEXT: br label [[WHILE_COND:%.*]] // CHECK1: while.cond: // CHECK1-NEXT: br label [[WHILE_BODY:%.*]] // CHECK1: while.body: -// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP6]] to i32 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load float, float* [[T_VAR]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP13]] to i32 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: store i32 [[CONV]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast %struct.S* [[VAR3]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP7]], i8* align 4 [[TMP8]], i64 4, i1 false) +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[ARRAYIDX1]] to i8* +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false) // CHECK1-NEXT: br label [[WHILE_COND]], !llvm.loop [[LOOP5:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], { float, float }* noundef nonnull align 4 dereferenceable(8) [[CF:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[CF_ADDR:%.*]] = alloca { float, float }*, align 8 -// CHECK1-NEXT: [[CF1:%.*]] = alloca { float, float }, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[CF:%.*]] = alloca { float, float }, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { float, float }, align 4 -// CHECK1-NEXT: [[ATOMIC_TEMP10:%.*]] = alloca { float, float }, align 4 +// CHECK1-NEXT: [[ATOMIC_TEMP9:%.*]] = alloca { float, float }, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca { float, float }, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store { float, float }* [[CF]], { float, float }** [[CF_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load { float, float }*, { float, float }** [[CF_ADDR]], align 8 -// CHECK1-NEXT: [[CF1_REALP:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[CF1]], i32 0, i32 0 -// CHECK1-NEXT: [[CF1_IMAGP:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[CF1]], i32 0, i32 1 -// CHECK1-NEXT: store float 0.000000e+00, float* [[CF1_REALP]], align 4 -// CHECK1-NEXT: store float 0.000000e+00, float* [[CF1_IMAGP]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = bitcast { float, float }* [[CF1]] to i8* -// CHECK1-NEXT: store i8* [[TMP2]], i8** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP5]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load { float, float }*, { float, float }** [[TMP1]], align 8 +// CHECK1-NEXT: [[CF_REALP:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[CF]], i32 0, i32 0 +// CHECK1-NEXT: [[CF_IMAGP:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[CF]], i32 0, i32 1 +// CHECK1-NEXT: store float 0.000000e+00, float* [[CF_REALP]], align 4 +// CHECK1-NEXT: store float 0.000000e+00, float* [[CF_IMAGP]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast { float, float }* [[CF]] to i8* +// CHECK1-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 1, i64 8, i8* [[TMP7]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[DOTREALP:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[DOTREALP:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: [[DOTREAL:%.*]] = load float, float* [[DOTREALP]], align 4 -// CHECK1-NEXT: [[DOTIMAGP:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[DOTIMAGP:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[TMP2]], i32 0, i32 1 // CHECK1-NEXT: [[DOTIMAG:%.*]] = load float, float* [[DOTIMAGP]], align 4 -// CHECK1-NEXT: [[CF1_REALP2:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[CF1]], i32 0, i32 0 -// CHECK1-NEXT: [[CF1_REAL:%.*]] = load float, float* [[CF1_REALP2]], align 4 -// CHECK1-NEXT: [[CF1_IMAGP3:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[CF1]], i32 0, i32 1 -// CHECK1-NEXT: [[CF1_IMAG:%.*]] = load float, float* [[CF1_IMAGP3]], align 4 -// CHECK1-NEXT: [[ADD_R:%.*]] = fadd float [[DOTREAL]], [[CF1_REAL]] -// CHECK1-NEXT: [[ADD_I:%.*]] = fadd float [[DOTIMAG]], [[CF1_IMAG]] -// CHECK1-NEXT: [[DOTREALP4:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[DOTIMAGP5:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[TMP0]], i32 0, i32 1 -// CHECK1-NEXT: store float [[ADD_R]], float* [[DOTREALP4]], align 4 -// CHECK1-NEXT: store float [[ADD_I]], float* [[DOTIMAGP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[CF_REALP1:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[CF]], i32 0, i32 0 +// CHECK1-NEXT: [[CF_REAL:%.*]] = load float, float* [[CF_REALP1]], align 4 +// CHECK1-NEXT: [[CF_IMAGP2:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[CF]], i32 0, i32 1 +// CHECK1-NEXT: [[CF_IMAG:%.*]] = load float, float* [[CF_IMAGP2]], align 4 +// CHECK1-NEXT: [[ADD_R:%.*]] = fadd float [[DOTREAL]], [[CF_REAL]] +// CHECK1-NEXT: [[ADD_I:%.*]] = fadd float [[DOTIMAG]], [[CF_IMAG]] +// CHECK1-NEXT: [[DOTREALP3:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[DOTIMAGP4:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[TMP2]], i32 0, i32 1 +// CHECK1-NEXT: store float [[ADD_R]], float* [[DOTREALP3]], align 4 +// CHECK1-NEXT: store float [[ADD_I]], float* [[DOTIMAGP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[CF1_REALP6:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[CF1]], i32 0, i32 0 -// CHECK1-NEXT: [[CF1_REAL7:%.*]] = load float, float* [[CF1_REALP6]], align 4 -// CHECK1-NEXT: [[CF1_IMAGP8:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[CF1]], i32 0, i32 1 -// CHECK1-NEXT: [[CF1_IMAG9:%.*]] = load float, float* [[CF1_IMAGP8]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast { float, float }* [[TMP0]] to i8* -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast { float, float }* [[ATOMIC_TEMP]] to i8* -// CHECK1-NEXT: call void @__atomic_load(i64 noundef 8, i8* noundef [[TMP7]], i8* noundef [[TMP8]], i32 noundef 0) +// CHECK1-NEXT: [[CF_REALP5:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[CF]], i32 0, i32 0 +// CHECK1-NEXT: [[CF_REAL6:%.*]] = load float, float* [[CF_REALP5]], align 4 +// CHECK1-NEXT: [[CF_IMAGP7:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[CF]], i32 0, i32 1 +// CHECK1-NEXT: [[CF_IMAG8:%.*]] = load float, float* [[CF_IMAGP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast { float, float }* [[TMP2]] to i8* +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast { float, float }* [[ATOMIC_TEMP]] to i8* +// CHECK1-NEXT: call void @__atomic_load(i64 noundef 8, i8* noundef [[TMP9]], i8* noundef [[TMP10]], i32 noundef 0) // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: // CHECK1-NEXT: [[ATOMIC_TEMP_REALP:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[ATOMIC_TEMP]], i32 0, i32 0 @@ -973,24 +1003,24 @@ // CHECK1-NEXT: [[TMP_IMAGP:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[TMP]], i32 0, i32 1 // CHECK1-NEXT: store float [[ATOMIC_TEMP_REAL]], float* [[TMP_REALP]], align 4 // CHECK1-NEXT: store float [[ATOMIC_TEMP_IMAG]], float* [[TMP_IMAGP]], align 4 -// CHECK1-NEXT: [[TMP_REALP11:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[TMP]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP_REAL:%.*]] = load float, float* [[TMP_REALP11]], align 4 -// CHECK1-NEXT: [[TMP_IMAGP12:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP_IMAG:%.*]] = load float, float* [[TMP_IMAGP12]], align 4 -// CHECK1-NEXT: [[CF1_REALP13:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[CF1]], i32 0, i32 0 -// CHECK1-NEXT: [[CF1_REAL14:%.*]] = load float, float* [[CF1_REALP13]], align 4 -// CHECK1-NEXT: [[CF1_IMAGP15:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[CF1]], i32 0, i32 1 -// CHECK1-NEXT: [[CF1_IMAG16:%.*]] = load float, float* [[CF1_IMAGP15]], align 4 -// CHECK1-NEXT: [[ADD_R17:%.*]] = fadd float [[TMP_REAL]], [[CF1_REAL14]] -// CHECK1-NEXT: [[ADD_I18:%.*]] = fadd float [[TMP_IMAG]], [[CF1_IMAG16]] -// CHECK1-NEXT: [[ATOMIC_TEMP10_REALP:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[ATOMIC_TEMP10]], i32 0, i32 0 -// CHECK1-NEXT: [[ATOMIC_TEMP10_IMAGP:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[ATOMIC_TEMP10]], i32 0, i32 1 -// CHECK1-NEXT: store float [[ADD_R17]], float* [[ATOMIC_TEMP10_REALP]], align 4 -// CHECK1-NEXT: store float [[ADD_I18]], float* [[ATOMIC_TEMP10_IMAGP]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast { float, float }* [[TMP0]] to i8* -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast { float, float }* [[ATOMIC_TEMP]] to i8* -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast { float, float }* [[ATOMIC_TEMP10]] to i8* -// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 8, i8* noundef [[TMP9]], i8* noundef [[TMP10]], i8* noundef [[TMP11]], i32 noundef 0, i32 noundef 0) +// CHECK1-NEXT: [[TMP_REALP10:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[TMP]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP_REAL:%.*]] = load float, float* [[TMP_REALP10]], align 4 +// CHECK1-NEXT: [[TMP_IMAGP11:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP_IMAG:%.*]] = load float, float* [[TMP_IMAGP11]], align 4 +// CHECK1-NEXT: [[CF_REALP12:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[CF]], i32 0, i32 0 +// CHECK1-NEXT: [[CF_REAL13:%.*]] = load float, float* [[CF_REALP12]], align 4 +// CHECK1-NEXT: [[CF_IMAGP14:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[CF]], i32 0, i32 1 +// CHECK1-NEXT: [[CF_IMAG15:%.*]] = load float, float* [[CF_IMAGP14]], align 4 +// CHECK1-NEXT: [[ADD_R16:%.*]] = fadd float [[TMP_REAL]], [[CF_REAL13]] +// CHECK1-NEXT: [[ADD_I17:%.*]] = fadd float [[TMP_IMAG]], [[CF_IMAG15]] +// CHECK1-NEXT: [[ATOMIC_TEMP9_REALP:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[ATOMIC_TEMP9]], i32 0, i32 0 +// CHECK1-NEXT: [[ATOMIC_TEMP9_IMAGP:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[ATOMIC_TEMP9]], i32 0, i32 1 +// CHECK1-NEXT: store float [[ADD_R16]], float* [[ATOMIC_TEMP9_REALP]], align 4 +// CHECK1-NEXT: store float [[ADD_I17]], float* [[ATOMIC_TEMP9_IMAGP]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast { float, float }* [[TMP2]] to i8* +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast { float, float }* [[ATOMIC_TEMP]] to i8* +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast { float, float }* [[ATOMIC_TEMP9]] to i8* +// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 8, i8* noundef [[TMP11]], i8* noundef [[TMP12]], i8* noundef [[TMP13]], i32 noundef 0, i32 noundef 0) // CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] @@ -1037,42 +1067,55 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_4:%.*]], align 4 // CHECK1-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 -// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_0]], align 128 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.4], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_4]], align 128 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_4]], align 128 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN3SSTIiEC1Ev(%struct.SST* noundef nonnull align 4 dereferenceable(4) [[SST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 128 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*, %struct.S.0*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], i32* [[T_VAR]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[VAR]], %struct.S.0* [[VAR1]], i32* [[T_VAR1]]) +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], [2 x %struct.S.4]* [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_4]], %struct.S.4* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S.4]* [[S_ARR]], [2 x %struct.S.4]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store %struct.S.4* [[VAR]], %struct.S.4** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store %struct.S.4* [[VAR1]], %struct.S.4** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store i32* [[T_VAR1]], i32** [[TMP6]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], [2 x %struct.S.4]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S_4]], %struct.S.4* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.4* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_4]], %struct.S.4* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.4* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP8]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -1083,6 +1126,7 @@ // CHECK1-NEXT: [[A2:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[C5:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 @@ -1101,14 +1145,22 @@ // CHECK1-NEXT: [[C6:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2 // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C6]], align 8 // CHECK1-NEXT: store i32* [[TMP1]], i32** [[C5]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C5]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]], i32* [[TMP2]], i32* [[B4]], i32* [[TMP3]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[B4]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A2]], align 8 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[B4]], i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[C5]], align 8 +// CHECK1-NEXT: store i32* [[TMP7]], i32** [[TMP6]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[B4]], align 4 // CHECK1-NEXT: [[B7:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 +// CHECK1-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i8 // CHECK1-NEXT: [[BF_LOAD8:%.*]] = load i8, i8* [[B7]], align 4 -// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP5]], 15 +// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP9]], 15 // CHECK1-NEXT: [[BF_CLEAR9:%.*]] = and i8 [[BF_LOAD8]], -16 // CHECK1-NEXT: [[BF_SET10:%.*]] = or i8 [[BF_CLEAR9]], [[BF_VALUE]] // CHECK1-NEXT: store i8 [[BF_SET10]], i8* [[B7]], align 4 @@ -1116,91 +1168,90 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[B4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[C5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 -// CHECK1-NEXT: store i32* [[TMP3]], i32** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[A2]], align 4 -// CHECK1-NEXT: store i32* [[A2]], i32** [[_TMP3]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[B4]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[_TMP1]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[C5]], align 4 -// CHECK1-NEXT: store i32* [[C5]], i32** [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP6]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP8]], -1 -// CHECK1-NEXT: store i32 [[DEC]], i32* [[B4]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[DIV]], i32* [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i32* [[A2]] to i8* -// CHECK1-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i32* [[B4]] to i8* -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i32* [[C5]] to i8* -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]], i32 3, i64 24, i8* [[TMP19]], void (i8*, i8*)* @.omp.reduction.reduction_func.7, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 8 +// CHECK1-NEXT: store i32* [[TMP8]], i32** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK1-NEXT: store i32* [[A]], i32** [[_TMP2]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[C]], align 4 +// CHECK1-NEXT: store i32* [[C]], i32** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP13]], -1 +// CHECK1-NEXT: store i32 [[DEC]], i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[DIV]], i32* [[TMP14]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[A]] to i8* +// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i32* [[B]] to i8* +// CHECK1-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[C]] to i8* +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 3, i64 24, i8* [[TMP24]], void (i8*, i8*)* @.omp.reduction.reduction_func.7, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[A2]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[C5]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[C]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[A2]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = atomicrmw add i32* [[TMP4]], i32 [[TMP27]] monotonic, align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP29]] monotonic, align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[C5]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = atomicrmw add i32* [[TMP5]], i32 [[TMP31]] monotonic, align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = atomicrmw add i32* [[TMP9]], i32 [[TMP32]] monotonic, align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP34]] monotonic, align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[C]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = atomicrmw add i32* [[TMP10]], i32 [[TMP36]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -1290,12 +1341,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR7]] align 2 { +// CHECK1-SAME: (%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR7]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.4*, align 8 +// CHECK1-NEXT: store %struct.S.4* [[THIS]], %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.4*, %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK1-NEXT: ret void // // @@ -1310,152 +1361,149 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR7]] align 2 { +// CHECK1-SAME: (%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR7]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.4*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.4* [[THIS]], %struct.S.4** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.4*, %struct.S.4** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 128 -// CHECK1-NEXT: [[VAR3:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK1-NEXT: [[VAR14:%.*]] = alloca [[STRUCT_S_0]], align 128 -// CHECK1-NEXT: [[T_VAR15:%.*]] = alloca i32, align 128 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_4:%.*]], align 128 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_4]], align 128 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: [[REF_TMP11:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_4]], align 4 +// CHECK1-NEXT: [[REF_TMP6:%.*]] = alloca [[STRUCT_S_4]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR1]], %struct.S.0** [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR1]], i32** [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[T_VAR2]], align 128 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: store i32 2147483647, i32* [[T_VAR15]], align 128 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[T_VAR2]], align 128 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP0]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast %struct.S.0* [[VAR3]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP7]], i8* align 128 [[TMP8]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i32* [[T_VAR2]] to i8* -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[VAR3]] to i8* -// CHECK1-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[VAR14]] to i8* -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i32* [[T_VAR15]] to i8* -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]], i32 4, i64 32, i8* [[TMP19]], void (i8*, i8*)* @.omp.reduction.reduction_func.9, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.4]*, [2 x %struct.S.4]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.4*, %struct.S.4** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.S.4*, %struct.S.4** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 128 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: store i32 2147483647, i32* [[T_VAR1]], align 128 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR]], align 128 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.4], [2 x %struct.S.4]* [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S.4* [[ARRAYIDX1]] to i8* +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %struct.S.4* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 128 [[TMP15]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S.4* [[VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S.4* [[VAR1]] to i8* +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[T_VAR1]] to i8* +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]], i32 4, i64 32, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.9, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP1]], align 128 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR2]], align 128 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP1]], align 128 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEanERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP3]] to i8* -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast %struct.S.0* [[CALL]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP23]], i8* align 4 [[TMP24]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL7:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP4]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL7]], 0 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP4]], align 128 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 128 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP4]], align 128 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.4* @_ZN1SIiEanERKS0_(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.S.4* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast %struct.S.4* [[TMP8]] to i8* +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast %struct.S.4* [[CALL]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP30]], i8* align 4 [[TMP31]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL2:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[TMP10]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL2]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL8:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL9:%.*]] = icmp ne i32 [[CALL8]], 0 +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL4:%.*]] = icmp ne i32 [[CALL3]], 0 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP25:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL9]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP25]] to i32 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[CONV]]) -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[TMP4]] to i8* -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP5]], align 128 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR15]], align 128 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP32:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL4]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP32]] to i32 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[CONV]]) +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S.4* [[TMP10]] to i8* +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S.4* [[REF_TMP]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP12]], align 128 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[T_VAR1]], align 128 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP35]], [[TMP36]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP5]], align 128 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP12]], align 128 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[T_VAR15]], align 128 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[T_VAR1]], align 128 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE]] ], [ [[TMP31]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[TMP5]], align 128 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE]] ], [ [[TMP38]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[TMP12]], align 128 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[T_VAR2]], align 128 -// CHECK1-NEXT: [[TMP33:%.*]] = atomicrmw add i32* [[TMP1]], i32 [[TMP32]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL10:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEanERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[TMP3]] to i8* -// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[CALL10]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL12:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP4]]) -// CHECK1-NEXT: [[TOBOOL13:%.*]] = icmp ne i32 [[CALL12]], 0 -// CHECK1-NEXT: br i1 [[TOBOOL13]], label [[LAND_RHS14:%.*]], label [[LAND_END17:%.*]] -// CHECK1: land.rhs14: -// CHECK1-NEXT: [[CALL15:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL16:%.*]] = icmp ne i32 [[CALL15]], 0 -// CHECK1-NEXT: br label [[LAND_END17]] -// CHECK1: land.end17: -// CHECK1-NEXT: [[TMP36:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL16]], [[LAND_RHS14]] ] -// CHECK1-NEXT: [[CONV18:%.*]] = zext i1 [[TMP36]] to i32 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP11]], i32 noundef [[CONV18]]) -// CHECK1-NEXT: [[TMP37:%.*]] = bitcast %struct.S.0* [[TMP4]] to i8* -// CHECK1-NEXT: [[TMP38:%.*]] = bitcast %struct.S.0* [[REF_TMP11]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP37]], i8* align 4 [[TMP38]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP11]]) #[[ATTR5]] -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[T_VAR15]], align 128 -// CHECK1-NEXT: [[TMP40:%.*]] = atomicrmw min i32* [[TMP5]], i32 [[TMP39]] monotonic, align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[T_VAR]], align 128 +// CHECK1-NEXT: [[TMP40:%.*]] = atomicrmw add i32* [[TMP4]], i32 [[TMP39]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL5:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.4* @_ZN1SIiEanERKS0_(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.S.4* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP41:%.*]] = bitcast %struct.S.4* [[TMP8]] to i8* +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast %struct.S.4* [[CALL5]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP41]], i8* align 4 [[TMP42]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL7:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[TMP10]]) +// CHECK1-NEXT: [[TOBOOL8:%.*]] = icmp ne i32 [[CALL7]], 0 +// CHECK1-NEXT: br i1 [[TOBOOL8]], label [[LAND_RHS9:%.*]], label [[LAND_END12:%.*]] +// CHECK1: land.rhs9: +// CHECK1-NEXT: [[CALL10:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL11:%.*]] = icmp ne i32 [[CALL10]], 0 +// CHECK1-NEXT: br label [[LAND_END12]] +// CHECK1: land.end12: +// CHECK1-NEXT: [[TMP43:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL11]], [[LAND_RHS9]] ] +// CHECK1-NEXT: [[CONV13:%.*]] = zext i1 [[TMP43]] to i32 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[REF_TMP6]], i32 noundef [[CONV13]]) +// CHECK1-NEXT: [[TMP44:%.*]] = bitcast %struct.S.4* [[TMP10]] to i8* +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast %struct.S.4* [[REF_TMP6]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP44]], i8* align 4 [[TMP45]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[T_VAR1]], align 128 +// CHECK1-NEXT: [[TMP47:%.*]] = atomicrmw min i32* [[TMP12]], i32 [[TMP46]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) #[[ATTR5]] -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK1-NEXT: ret void // // @@ -1464,7 +1512,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_4:%.*]], align 4 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8 @@ -1479,16 +1527,16 @@ // CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32* // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[TMP5]], i64 0, i64 1 // CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to %struct.S.0* +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to %struct.S.4* // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to %struct.S.0* +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to %struct.S.4* // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[TMP5]], i64 0, i64 2 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to %struct.S.0* +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to %struct.S.4* // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[TMP3]], i64 0, i64 2 // CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP22]] to %struct.S.0* +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP22]] to %struct.S.4* // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[TMP5]], i64 0, i64 3 // CHECK1-NEXT: [[TMP25:%.*]] = load i8*, i8** [[TMP24]], align 8 // CHECK1-NEXT: [[TMP26:%.*]] = bitcast i8* [[TMP25]] to i32* @@ -1499,25 +1547,25 @@ // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP8]], align 128 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP11]], align 128 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEanERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP17]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP14]]) -// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[TMP17]] to i8* -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[CALL]] to i8* +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.4* @_ZN1SIiEanERKS0_(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[TMP17]], %struct.S.4* noundef nonnull align 4 dereferenceable(4) [[TMP14]]) +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S.4* [[TMP17]] to i8* +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S.4* [[CALL]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL2:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP23]]) +// CHECK1-NEXT: [[CALL2:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[TMP23]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL2]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP20]]) +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[TMP20]]) // CHECK1-NEXT: [[TOBOOL4:%.*]] = icmp ne i32 [[CALL3]], 0 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: // CHECK1-NEXT: [[TMP34:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TOBOOL4]], [[LAND_RHS]] ] // CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP34]] to i32 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[CONV]]) -// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[TMP23]] to i8* -// CHECK1-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[REF_TMP]] to i8* +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[CONV]]) +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S.4* [[TMP23]] to i8* +// CHECK1-NEXT: [[TMP36:%.*]] = bitcast %struct.S.4* [[REF_TMP]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP29]], align 128 // CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP26]], align 128 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP37]], [[TMP38]] @@ -1535,42 +1583,42 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEanERKS0_ -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR0]] align 2 { +// CHECK1-SAME: (%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.4* noundef nonnull align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR0]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP0]], %struct.S.0** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: ret %struct.S.0* [[THIS1]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.4*, align 8 +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct.S.4*, align 8 +// CHECK1-NEXT: store %struct.S.4* [[THIS]], %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.4* [[TMP0]], %struct.S.4** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.4*, %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: ret %struct.S.4* [[THIS1]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEcviEv -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) #[[ATTR0]] align 2 { +// CHECK1-SAME: (%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) #[[ATTR0]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.4*, align 8 +// CHECK1-NEXT: store %struct.S.4* [[THIS]], %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.4*, %struct.S.4** [[THIS_ADDR]], align 8 // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR7]] align 2 { +// CHECK1-SAME: (%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR7]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.4*, align 8 +// CHECK1-NEXT: store %struct.S.4* [[THIS]], %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.4*, %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR7]] align 2 { +// CHECK1-SAME: (%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR7]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.4*, align 8 +// CHECK1-NEXT: store %struct.S.4* [[THIS]], %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.4*, %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_4:%.*]], %struct.S.4* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 128 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK1-NEXT: ret void @@ -1581,78 +1629,84 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 // CHECK1-NEXT: [[A2:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, i32* [[A]], align 4 // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], %struct.SST* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32* [[A3]], i32** [[A2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A2]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SST* [[THIS1]], i32* [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SST* [[THIS1]], %struct.SST** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A2]], align 8 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SST* noundef [[THIS:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK1-NEXT: store i32 1, i32* [[A1]], align 4 -// CHECK1-NEXT: store i32* [[A1]], i32** [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK1-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 1, i64 8, i8* [[TMP9]], void (i8*, i8*)* @.omp.reduction.reduction_func.11, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SST*, %struct.SST** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK1-NEXT: store i32 1, i32* [[A]], align 4 +// CHECK1-NEXT: store i32* [[A]], i32** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i32* [[A]] to i8* +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 1, i64 8, i8* [[TMP12]], void (i8*, i8*)* @.omp.reduction.reduction_func.11, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[MUL]], i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[MUL]], i32* [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP2]] monotonic, align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP5]] monotonic, align 4 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP14:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP19:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[_TMP3]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[_TMP3]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: store i32 [[MUL4]], i32* [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP14]], i32 [[TMP17]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP19]] = extractvalue { i32, i1 } [[TMP18]], 0 -// CHECK1-NEXT: [[TMP20:%.*]] = extractvalue { i32, i1 } [[TMP18]], 1 -// CHECK1-NEXT: br i1 [[TMP20]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP17:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP22:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[_TMP2]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[_TMP2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[MUL3]], i32* [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = cmpxchg i32* [[TMP5]], i32 [[TMP17]], i32 [[TMP20]] monotonic monotonic, align 4 +// CHECK1-NEXT: [[TMP22]] = extractvalue { i32, i1 } [[TMP21]], 0 +// CHECK1-NEXT: [[TMP23:%.*]] = extractvalue { i32, i1 } [[TMP21]], 1 +// CHECK1-NEXT: br i1 [[TMP23]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: @@ -1684,14 +1738,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR7]] align 2 { +// CHECK1-SAME: (%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR7]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.4*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.4* [[THIS]], %struct.S.4** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.4*, %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_4:%.*]], %struct.S.4* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 128 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1700,11 +1754,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR7]] align 2 { +// CHECK1-SAME: (%struct.S.4* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR7]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.4*, align 8 +// CHECK1-NEXT: store %struct.S.4* [[THIS]], %struct.S.4** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.4*, %struct.S.4** [[THIS_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -1712,18 +1766,21 @@ // CHECK3-SAME: (i16* noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i16*, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK3-NEXT: store i16* [[X]], i16** [[X_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[X_ADDR]], align 8 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*)* @.omp_outlined. to void (i32*, i32*, ...)*), i16* [[TMP0]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16*, i16** [[X_ADDR]], align 8 +// CHECK3-NEXT: store i16* [[TMP1]], i16** [[TMP0]], align 8 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef [[X:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i16*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK3-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK3-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i16*, align 8 @@ -1732,109 +1789,111 @@ // CHECK3-NEXT: [[_TMP13:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i16* [[X]], i16** [[X_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[X_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i64 0 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16*, i16** [[X_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i64 0 -// CHECK3-NEXT: [[TMP2:%.*]] = ptrtoint i16* [[ARRAYIDX1]] to i64 -// CHECK3-NEXT: [[TMP3:%.*]] = ptrtoint i16* [[ARRAYIDX]] to i64 -// CHECK3-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] -// CHECK3-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) -// CHECK3-NEXT: [[TMP6:%.*]] = add nuw i64 [[TMP5]], 1 -// CHECK3-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) -// CHECK3-NEXT: [[TMP8:%.*]] = call i8* @llvm.stacksave() -// CHECK3-NEXT: store i8* [[TMP8]], i8** [[SAVED_STACK]], align 8 -// CHECK3-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP6]], align 16 -// CHECK3-NEXT: store i64 [[TMP6]], i64* [[__VLA_EXPR0]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[VLA]], i64 [[TMP6]] -// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[VLA]], [[TMP9]] +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 8 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP2]], i64 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[TMP1]], align 8 +// CHECK3-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 0 +// CHECK3-NEXT: [[TMP4:%.*]] = ptrtoint i16* [[ARRAYIDX1]] to i64 +// CHECK3-NEXT: [[TMP5:%.*]] = ptrtoint i16* [[ARRAYIDX]] to i64 +// CHECK3-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] +// CHECK3-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) +// CHECK3-NEXT: [[TMP8:%.*]] = add nuw i64 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) +// CHECK3-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK3-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// CHECK3-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP8]], align 16 +// CHECK3-NEXT: store i64 [[TMP8]], i64* [[__VLA_EXPR0]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i16, i16* [[VLA]], i64 [[TMP8]] +// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[VLA]], [[TMP11]] // CHECK3-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK3: omp.arrayinit.body: // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK3-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK3: omp.arrayinit.done: -// CHECK3-NEXT: [[TMP10:%.*]] = load i16*, i16** [[X_ADDR]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = ptrtoint i16* [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = ptrtoint i16* [[ARRAYIDX]] to i64 -// CHECK3-NEXT: [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr i16, i16* [[VLA]], i64 [[TMP14]] -// CHECK3-NEXT: store i16* [[TMP15]], i16** [[TMP]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i16* [[VLA]] to i8* -// CHECK3-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK3-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP6]] to i8* +// CHECK3-NEXT: [[TMP12:%.*]] = load i16*, i16** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = ptrtoint i16* [[TMP12]] to i64 +// CHECK3-NEXT: [[TMP14:%.*]] = ptrtoint i16* [[ARRAYIDX]] to i64 +// CHECK3-NEXT: [[TMP15:%.*]] = sub i64 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP16:%.*]] = sdiv exact i64 [[TMP15]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i16, i16* [[VLA]], i64 [[TMP16]] +// CHECK3-NEXT: store i16* [[TMP17]], i16** [[TMP]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast i16* [[VLA]] to i8* // CHECK3-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP21]], i32 1, i64 16, i8* [[TMP22]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP8]] to i8* +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP23]], i32 1, i64 16, i8* [[TMP24]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr i16, i16* [[ARRAYIDX]], i64 [[TMP6]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[ARRAYIDX]], [[TMP24]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr i16, i16* [[ARRAYIDX]], i64 [[TMP8]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[ARRAYIDX]], [[TMP26]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: // CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i16* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST2:%.*]] = phi i16* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[TMP25:%.*]] = load i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP25]] to i32 -// CHECK3-NEXT: [[TMP26:%.*]] = load i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 2 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP26]] to i32 +// CHECK3-NEXT: [[TMP27:%.*]] = load i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP27]] to i32 +// CHECK3-NEXT: [[TMP28:%.*]] = load i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 2 +// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP28]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV3]] // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD]] to i16 // CHECK3-NEXT: store i16 [[CONV4]], i16* [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 2 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST2]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE6:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP24]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE6:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP26]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] // CHECK3: omp.arraycpy.done7: -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr i16, i16* [[ARRAYIDX]], i64 [[TMP6]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY8:%.*]] = icmp eq i16* [[ARRAYIDX]], [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr i16, i16* [[ARRAYIDX]], i64 [[TMP8]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY8:%.*]] = icmp eq i16* [[ARRAYIDX]], [[TMP29]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY8]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY9:%.*]] // CHECK3: omp.arraycpy.body9: // CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST10:%.*]] = phi i16* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT19:%.*]], [[ATOMIC_EXIT:%.*]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi i16* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[ATOMIC_EXIT]] ] -// CHECK3-NEXT: [[TMP28:%.*]] = load i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 -// CHECK3-NEXT: [[CONV12:%.*]] = sext i16 [[TMP28]] to i32 +// CHECK3-NEXT: [[TMP30:%.*]] = load i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 +// CHECK3-NEXT: [[CONV12:%.*]] = sext i16 [[TMP30]] to i32 // CHECK3-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST11]] monotonic, align 2 // CHECK3-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK3: atomic_cont: -// CHECK3-NEXT: [[TMP29:%.*]] = phi i16 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY9]] ], [ [[TMP34:%.*]], [[ATOMIC_CONT]] ] -// CHECK3-NEXT: store i16 [[TMP29]], i16* [[_TMP13]], align 2 -// CHECK3-NEXT: [[TMP30:%.*]] = load i16, i16* [[_TMP13]], align 2 -// CHECK3-NEXT: [[CONV14:%.*]] = sext i16 [[TMP30]] to i32 -// CHECK3-NEXT: [[TMP31:%.*]] = load i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 -// CHECK3-NEXT: [[CONV15:%.*]] = sext i16 [[TMP31]] to i32 +// CHECK3-NEXT: [[TMP31:%.*]] = phi i16 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY9]] ], [ [[TMP36:%.*]], [[ATOMIC_CONT]] ] +// CHECK3-NEXT: store i16 [[TMP31]], i16* [[_TMP13]], align 2 +// CHECK3-NEXT: [[TMP32:%.*]] = load i16, i16* [[_TMP13]], align 2 +// CHECK3-NEXT: [[CONV14:%.*]] = sext i16 [[TMP32]] to i32 +// CHECK3-NEXT: [[TMP33:%.*]] = load i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 +// CHECK3-NEXT: [[CONV15:%.*]] = sext i16 [[TMP33]] to i32 // CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV14]], [[CONV15]] // CHECK3-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i16 // CHECK3-NEXT: store i16 [[CONV17]], i16* [[ATOMIC_TEMP]], align 2 -// CHECK3-NEXT: [[TMP32:%.*]] = load i16, i16* [[ATOMIC_TEMP]], align 2 -// CHECK3-NEXT: [[TMP33:%.*]] = cmpxchg i16* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i16 [[TMP29]], i16 [[TMP32]] monotonic monotonic, align 2 -// CHECK3-NEXT: [[TMP34]] = extractvalue { i16, i1 } [[TMP33]], 0 -// CHECK3-NEXT: [[TMP35:%.*]] = extractvalue { i16, i1 } [[TMP33]], 1 -// CHECK3-NEXT: br i1 [[TMP35]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i16, i16* [[ATOMIC_TEMP]], align 2 +// CHECK3-NEXT: [[TMP35:%.*]] = cmpxchg i16* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i16 [[TMP31]], i16 [[TMP34]] monotonic monotonic, align 2 +// CHECK3-NEXT: [[TMP36]] = extractvalue { i16, i1 } [[TMP35]], 0 +// CHECK3-NEXT: [[TMP37:%.*]] = extractvalue { i16, i1 } [[TMP35]], 1 +// CHECK3-NEXT: br i1 [[TMP37]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK3: atomic_exit: // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT19]] = getelementptr i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST10]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP27]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP29]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY9]] // CHECK3: omp.arraycpy.done21: // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: -// CHECK3-NEXT: [[TMP36:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK3-NEXT: call void @llvm.stackrestore(i8* [[TMP36]]) +// CHECK3-NEXT: [[TMP38:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK3-NEXT: call void @llvm.stackrestore(i8* [[TMP38]]) // CHECK3-NEXT: ret void // // @@ -1912,6 +1971,7 @@ // CHECK3-NEXT: [[A2:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C5:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK3-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 @@ -1930,14 +1990,22 @@ // CHECK3-NEXT: [[C6:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2 // CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C6]], align 8 // CHECK3-NEXT: store i32* [[TMP1]], i32** [[C5]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A2]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C5]], align 8 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]], i32* [[TMP2]], i32* [[B4]], i32* [[TMP3]]) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[B4]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A2]], align 8 +// CHECK3-NEXT: store i32* [[TMP4]], i32** [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[B4]], i32** [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[C5]], align 8 +// CHECK3-NEXT: store i32* [[TMP7]], i32** [[TMP6]], align 8 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[B4]], align 4 // CHECK3-NEXT: [[B7:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 +// CHECK3-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i8 // CHECK3-NEXT: [[BF_LOAD8:%.*]] = load i8, i8* [[B7]], align 4 -// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP5]], 15 +// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP9]], 15 // CHECK3-NEXT: [[BF_CLEAR9:%.*]] = and i8 [[BF_LOAD8]], -16 // CHECK3-NEXT: [[BF_SET10:%.*]] = or i8 [[BF_CLEAR9]], [[BF_VALUE]] // CHECK3-NEXT: store i8 [[BF_SET10]], i8* [[B7]], align 4 @@ -1945,127 +2013,135 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[A2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[B4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP6:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[B_ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 -// CHECK3-NEXT: store i32* [[TMP3]], i32** [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[A2]], align 4 -// CHECK3-NEXT: store i32* [[A2]], i32** [[_TMP3]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[B4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[_TMP1]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[C5]], align 4 -// CHECK3-NEXT: store i32* [[C5]], i32** [[_TMP6]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK3-NEXT: store i32* [[TMP8]], i32** [[TMP7]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[B4]], i32** [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP6]], align 8 -// CHECK3-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8 -// CHECK3-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP13:%.*]] = bitcast i32* [[A2]] to i8* -// CHECK3-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i32* [[B4]] to i8* -// CHECK3-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i32* [[C5]] to i8* -// CHECK3-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]], i32 3, i64 24, i8* [[TMP20]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK3-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 8 +// CHECK3-NEXT: store i32* [[TMP8]], i32** [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK3-NEXT: store i32* [[A]], i32** [[_TMP2]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[C]], align 4 +// CHECK3-NEXT: store i32* [[C]], i32** [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK3-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[B]], i32** [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK3-NEXT: store i32* [[TMP16]], i32** [[TMP15]], align 8 +// CHECK3-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(%class.anon.1* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i32* [[A]] to i8* +// CHECK3-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[B]] to i8* +// CHECK3-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK3-NEXT: [[TMP22:%.*]] = bitcast i32* [[C]] to i8* +// CHECK3-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]], i32 3, i64 24, i8* [[TMP25]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP26]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[A2]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[C5]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[C]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[A2]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = atomicrmw add i32* [[TMP4]], i32 [[TMP28]] monotonic, align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP30]] monotonic, align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[C5]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = atomicrmw add i32* [[TMP5]], i32 [[TMP32]] monotonic, align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = atomicrmw add i32* [[TMP9]], i32 [[TMP33]] monotonic, align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP35]] monotonic, align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[C]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = atomicrmw add i32* [[TMP10]], i32 [[TMP37]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZZN2SSC1ERiENKUlvE_clEv -// CHECK3-SAME: (%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR0]] align 2 { +// CHECK3-SAME: (%class.anon.1* noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR0]] align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.0*, align 8 -// CHECK3-NEXT: store %class.anon.0* [[THIS]], %class.anon.0** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[THIS1:%.*]] = load %class.anon.0*, %class.anon.0** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], %class.anon.0* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.1*, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK3-NEXT: store %class.anon.1* [[THIS]], %class.anon.1** [[THIS_ADDR]], align 8 +// CHECK3-NEXT: [[THIS1:%.*]] = load %class.anon.1*, %class.anon.1** [[THIS_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], %class.anon.1* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load %struct.SS*, %struct.SS** [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1 // CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 // CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK3-NEXT: store i32 [[INC]], i32* [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 // CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1 // CHECK3-NEXT: store i32 [[DEC]], i32* [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 3 // CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP8]], align 8 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[DIV]], i32* [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP1]], %struct.SS** [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1 // CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[TMP15]], align 8 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.SS* [[TMP1]], i32* [[TMP12]], i32* [[TMP14]], i32* [[TMP16]]) +// CHECK3-NEXT: store i32* [[TMP14]], i32** [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[TMP16]], align 8 +// CHECK3-NEXT: store i32* [[TMP17]], i32** [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP19]], align 8 +// CHECK3-NEXT: store i32* [[TMP20]], i32** [[TMP18]], align 8 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // @@ -2114,91 +2190,90 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[A2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[B4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP6:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[B_ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 -// CHECK3-NEXT: store i32* [[TMP3]], i32** [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK3-NEXT: store i32 -1, i32* [[A2]], align 4 -// CHECK3-NEXT: store i32* [[A2]], i32** [[_TMP3]], align 8 -// CHECK3-NEXT: store i32 -1, i32* [[B4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[_TMP1]], align 8 -// CHECK3-NEXT: store i32 -1, i32* [[C5]], align 4 -// CHECK3-NEXT: store i32* [[C5]], i32** [[_TMP6]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP8]], -1 -// CHECK3-NEXT: store i32 [[DEC]], i32* [[B4]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP6]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[DIV]], i32* [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i32* [[A2]] to i8* -// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast i32* [[B4]] to i8* -// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast i32* [[C5]] to i8* -// CHECK3-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]], i32 3, i64 24, i8* [[TMP19]], void (i8*, i8*)* @.omp.reduction.reduction_func.4, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK3-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 8 +// CHECK3-NEXT: store i32* [[TMP8]], i32** [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK3-NEXT: store i32 -1, i32* [[A]], align 4 +// CHECK3-NEXT: store i32* [[A]], i32** [[_TMP2]], align 8 +// CHECK3-NEXT: store i32 -1, i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK3-NEXT: store i32 -1, i32* [[C]], align 4 +// CHECK3-NEXT: store i32* [[C]], i32** [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP13]], -1 +// CHECK3-NEXT: store i32 [[DEC]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[DIV]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i32* [[A]] to i8* +// CHECK3-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast i32* [[B]] to i8* +// CHECK3-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK3-NEXT: [[TMP21:%.*]] = bitcast i32* [[C]] to i8* +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 3, i64 24, i8* [[TMP24]], void (i8*, i8*)* @.omp.reduction.reduction_func.4, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[A2]], align 4 -// CHECK3-NEXT: [[AND:%.*]] = and i32 [[TMP21]], [[TMP22]] -// CHECK3-NEXT: store i32 [[AND]], i32* [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK3-NEXT: [[AND7:%.*]] = and i32 [[TMP23]], [[TMP24]] -// CHECK3-NEXT: store i32 [[AND7]], i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[C5]], align 4 -// CHECK3-NEXT: [[AND8:%.*]] = and i32 [[TMP25]], [[TMP26]] -// CHECK3-NEXT: store i32 [[AND8]], i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[AND:%.*]] = and i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: store i32 [[AND]], i32* [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: [[AND4:%.*]] = and i32 [[TMP28]], [[TMP29]] +// CHECK3-NEXT: store i32 [[AND4]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[C]], align 4 +// CHECK3-NEXT: [[AND5:%.*]] = and i32 [[TMP30]], [[TMP31]] +// CHECK3-NEXT: store i32 [[AND5]], i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[A2]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = atomicrmw and i32* [[TMP4]], i32 [[TMP27]] monotonic, align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = atomicrmw and i32* [[TMP2]], i32 [[TMP29]] monotonic, align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[C5]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = atomicrmw and i32* [[TMP5]], i32 [[TMP31]] monotonic, align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = atomicrmw and i32* [[TMP9]], i32 [[TMP32]] monotonic, align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = atomicrmw and i32* [[TMP6]], i32 [[TMP34]] monotonic, align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[C]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = atomicrmw and i32* [[TMP10]], i32 [[TMP36]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -2249,44 +2324,46 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[G:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 128 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK3-NEXT: [[G:%.*]] = alloca i32, align 128 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[G1]], align 128 -// CHECK3-NEXT: store i32 1, i32* [[G1]], align 128 -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store i32* [[G1]], i32** [[TMP1]], align 8 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast i32* [[G1]] to i8* -// CHECK3-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1, i64 8, i8* [[TMP6]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP7]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[G]], align 128 +// CHECK3-NEXT: store i32 1, i32* [[G]], align 128 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[G]], i32** [[TMP3]], align 8 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.4* noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i32* [[G]] to i8* +// CHECK3-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 1, i64 8, i8* [[TMP8]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP0]], align 128 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[G1]], align 128 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 128 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP2]], align 128 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[G]], align 128 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 128 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[G1]], align 128 -// CHECK3-NEXT: [[TMP11:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP10]] monotonic, align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[G]], align 128 +// CHECK3-NEXT: [[TMP13:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP12]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -2320,18 +2397,21 @@ // CHECK4-SAME: (i16* noundef [[X:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[X_ADDR:%.*]] = alloca i16*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: store i16* [[X]], i16** [[X_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load i16*, i16** [[X_ADDR]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*)* @.omp_outlined. to void (i32*, i32*, ...)*), i16* [[TMP0]]) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP1:%.*]] = load i16*, i16** [[X_ADDR]], align 8 +// CHECK4-NEXT: store i16* [[TMP1]], i16** [[TMP0]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef [[X:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[X_ADDR:%.*]] = alloca i16*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK4-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca i16*, align 8 @@ -2340,109 +2420,111 @@ // CHECK4-NEXT: [[_TMP13:%.*]] = alloca i16, align 2 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store i16* [[X]], i16** [[X_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load i16*, i16** [[X_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i64 0 -// CHECK4-NEXT: [[TMP1:%.*]] = load i16*, i16** [[X_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i64 0 -// CHECK4-NEXT: [[TMP2:%.*]] = ptrtoint i16* [[ARRAYIDX1]] to i64 -// CHECK4-NEXT: [[TMP3:%.*]] = ptrtoint i16* [[ARRAYIDX]] to i64 -// CHECK4-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] -// CHECK4-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) -// CHECK4-NEXT: [[TMP6:%.*]] = add nuw i64 [[TMP5]], 1 -// CHECK4-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) -// CHECK4-NEXT: [[TMP8:%.*]] = call i8* @llvm.stacksave() -// CHECK4-NEXT: store i8* [[TMP8]], i8** [[SAVED_STACK]], align 8 -// CHECK4-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP6]], align 16 -// CHECK4-NEXT: store i64 [[TMP6]], i64* [[__VLA_EXPR0]], align 8 -// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[VLA]], i64 [[TMP6]] -// CHECK4-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[VLA]], [[TMP9]] +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 8 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP2]], i64 0 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16*, i16** [[TMP1]], align 8 +// CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 0 +// CHECK4-NEXT: [[TMP4:%.*]] = ptrtoint i16* [[ARRAYIDX1]] to i64 +// CHECK4-NEXT: [[TMP5:%.*]] = ptrtoint i16* [[ARRAYIDX]] to i64 +// CHECK4-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] +// CHECK4-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) +// CHECK4-NEXT: [[TMP8:%.*]] = add nuw i64 [[TMP7]], 1 +// CHECK4-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) +// CHECK4-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK4-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// CHECK4-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP8]], align 16 +// CHECK4-NEXT: store i64 [[TMP8]], i64* [[__VLA_EXPR0]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr i16, i16* [[VLA]], i64 [[TMP8]] +// CHECK4-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[VLA]], [[TMP11]] // CHECK4-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK4: omp.arrayinit.body: // CHECK4-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK4-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2 // CHECK4-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK4-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK4-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] // CHECK4-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK4: omp.arrayinit.done: -// CHECK4-NEXT: [[TMP10:%.*]] = load i16*, i16** [[X_ADDR]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = ptrtoint i16* [[TMP10]] to i64 -// CHECK4-NEXT: [[TMP12:%.*]] = ptrtoint i16* [[ARRAYIDX]] to i64 -// CHECK4-NEXT: [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr i16, i16* [[VLA]], i64 [[TMP14]] -// CHECK4-NEXT: store i16* [[TMP15]], i16** [[TMP]], align 8 -// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP17:%.*]] = bitcast i16* [[VLA]] to i8* -// CHECK4-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP6]] to i8* +// CHECK4-NEXT: [[TMP12:%.*]] = load i16*, i16** [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP13:%.*]] = ptrtoint i16* [[TMP12]] to i64 +// CHECK4-NEXT: [[TMP14:%.*]] = ptrtoint i16* [[ARRAYIDX]] to i64 +// CHECK4-NEXT: [[TMP15:%.*]] = sub i64 [[TMP13]], [[TMP14]] +// CHECK4-NEXT: [[TMP16:%.*]] = sdiv exact i64 [[TMP15]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) +// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr i16, i16* [[VLA]], i64 [[TMP16]] +// CHECK4-NEXT: store i16* [[TMP17]], i16** [[TMP]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP19:%.*]] = bitcast i16* [[VLA]] to i8* // CHECK4-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 8 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK4-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP21]], i32 1, i64 16, i8* [[TMP22]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK4-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK4-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP8]] to i8* +// CHECK4-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK4-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP23]], i32 1, i64 16, i8* [[TMP24]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.reduction.case1: -// CHECK4-NEXT: [[TMP24:%.*]] = getelementptr i16, i16* [[ARRAYIDX]], i64 [[TMP6]] -// CHECK4-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[ARRAYIDX]], [[TMP24]] +// CHECK4-NEXT: [[TMP26:%.*]] = getelementptr i16, i16* [[ARRAYIDX]], i64 [[TMP8]] +// CHECK4-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[ARRAYIDX]], [[TMP26]] // CHECK4-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK4: omp.arraycpy.body: // CHECK4-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i16* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK4-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST2:%.*]] = phi i16* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK4-NEXT: [[TMP25:%.*]] = load i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 2 -// CHECK4-NEXT: [[CONV:%.*]] = sext i16 [[TMP25]] to i32 -// CHECK4-NEXT: [[TMP26:%.*]] = load i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 2 -// CHECK4-NEXT: [[CONV3:%.*]] = sext i16 [[TMP26]] to i32 +// CHECK4-NEXT: [[TMP27:%.*]] = load i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 2 +// CHECK4-NEXT: [[CONV:%.*]] = sext i16 [[TMP27]] to i32 +// CHECK4-NEXT: [[TMP28:%.*]] = load i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 2 +// CHECK4-NEXT: [[CONV3:%.*]] = sext i16 [[TMP28]] to i32 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV3]] // CHECK4-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD]] to i16 // CHECK4-NEXT: store i16 [[CONV4]], i16* [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 2 // CHECK4-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST2]], i32 1 // CHECK4-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK4-NEXT: [[OMP_ARRAYCPY_DONE6:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP24]] +// CHECK4-NEXT: [[OMP_ARRAYCPY_DONE6:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP26]] // CHECK4-NEXT: br i1 [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] // CHECK4: omp.arraycpy.done7: -// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.case2: -// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr i16, i16* [[ARRAYIDX]], i64 [[TMP6]] -// CHECK4-NEXT: [[OMP_ARRAYCPY_ISEMPTY8:%.*]] = icmp eq i16* [[ARRAYIDX]], [[TMP27]] +// CHECK4-NEXT: [[TMP29:%.*]] = getelementptr i16, i16* [[ARRAYIDX]], i64 [[TMP8]] +// CHECK4-NEXT: [[OMP_ARRAYCPY_ISEMPTY8:%.*]] = icmp eq i16* [[ARRAYIDX]], [[TMP29]] // CHECK4-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY8]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY9:%.*]] // CHECK4: omp.arraycpy.body9: // CHECK4-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST10:%.*]] = phi i16* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT19:%.*]], [[ATOMIC_EXIT:%.*]] ] // CHECK4-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi i16* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[ATOMIC_EXIT]] ] -// CHECK4-NEXT: [[TMP28:%.*]] = load i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 -// CHECK4-NEXT: [[CONV12:%.*]] = sext i16 [[TMP28]] to i32 +// CHECK4-NEXT: [[TMP30:%.*]] = load i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 +// CHECK4-NEXT: [[CONV12:%.*]] = sext i16 [[TMP30]] to i32 // CHECK4-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST11]] monotonic, align 2 // CHECK4-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK4: atomic_cont: -// CHECK4-NEXT: [[TMP29:%.*]] = phi i16 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY9]] ], [ [[TMP34:%.*]], [[ATOMIC_CONT]] ] -// CHECK4-NEXT: store i16 [[TMP29]], i16* [[_TMP13]], align 2 -// CHECK4-NEXT: [[TMP30:%.*]] = load i16, i16* [[_TMP13]], align 2 -// CHECK4-NEXT: [[CONV14:%.*]] = sext i16 [[TMP30]] to i32 -// CHECK4-NEXT: [[TMP31:%.*]] = load i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 -// CHECK4-NEXT: [[CONV15:%.*]] = sext i16 [[TMP31]] to i32 +// CHECK4-NEXT: [[TMP31:%.*]] = phi i16 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY9]] ], [ [[TMP36:%.*]], [[ATOMIC_CONT]] ] +// CHECK4-NEXT: store i16 [[TMP31]], i16* [[_TMP13]], align 2 +// CHECK4-NEXT: [[TMP32:%.*]] = load i16, i16* [[_TMP13]], align 2 +// CHECK4-NEXT: [[CONV14:%.*]] = sext i16 [[TMP32]] to i32 +// CHECK4-NEXT: [[TMP33:%.*]] = load i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 +// CHECK4-NEXT: [[CONV15:%.*]] = sext i16 [[TMP33]] to i32 // CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV14]], [[CONV15]] // CHECK4-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i16 // CHECK4-NEXT: store i16 [[CONV17]], i16* [[ATOMIC_TEMP]], align 2 -// CHECK4-NEXT: [[TMP32:%.*]] = load i16, i16* [[ATOMIC_TEMP]], align 2 -// CHECK4-NEXT: [[TMP33:%.*]] = cmpxchg i16* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i16 [[TMP29]], i16 [[TMP32]] monotonic monotonic, align 2 -// CHECK4-NEXT: [[TMP34]] = extractvalue { i16, i1 } [[TMP33]], 0 -// CHECK4-NEXT: [[TMP35:%.*]] = extractvalue { i16, i1 } [[TMP33]], 1 -// CHECK4-NEXT: br i1 [[TMP35]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK4-NEXT: [[TMP34:%.*]] = load i16, i16* [[ATOMIC_TEMP]], align 2 +// CHECK4-NEXT: [[TMP35:%.*]] = cmpxchg i16* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i16 [[TMP31]], i16 [[TMP34]] monotonic monotonic, align 2 +// CHECK4-NEXT: [[TMP36]] = extractvalue { i16, i1 } [[TMP35]], 0 +// CHECK4-NEXT: [[TMP37:%.*]] = extractvalue { i16, i1 } [[TMP35]], 1 +// CHECK4-NEXT: br i1 [[TMP37]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK4: atomic_exit: // CHECK4-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK4-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT19]] = getelementptr i16, i16* [[OMP_ARRAYCPY_SRCELEMENTPAST10]], i32 1 -// CHECK4-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP27]] +// CHECK4-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP29]] // CHECK4-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY9]] // CHECK4: omp.arraycpy.done21: // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.default: -// CHECK4-NEXT: [[TMP36:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK4-NEXT: call void @llvm.stackrestore(i8* [[TMP36]]) +// CHECK4-NEXT: [[TMP38:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK4-NEXT: call void @llvm.stackrestore(i8* [[TMP38]]) // CHECK4-NEXT: ret void // // @@ -2518,28 +2600,33 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* @g) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store i32* @g, i32** [[TMP0]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[G:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[G1:%.*]] = alloca i32, align 128 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK4-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [96 x i8], i32 }>, align 128 // CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[G1]], align 128 -// CHECK4-NEXT: store i32 1, i32* [[G1]], align 128 +// CHECK4-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[G]], align 128 +// CHECK4-NEXT: store i32 1, i32* [[G]], align 128 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [96 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [96 x i8], i32 }>* [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 128 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [96 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [96 x i8], i32 }>* [[BLOCK]], i32 0, i32 1 @@ -2551,36 +2638,36 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [96 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [96 x i8], i32 }>* [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.2 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [96 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [96 x i8], i32 }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP1:%.*]] = load volatile i32, i32* [[G1]], align 128 -// CHECK4-NEXT: store volatile i32 [[TMP1]], i32* [[BLOCK_CAPTURED]], align 128 -// CHECK4-NEXT: [[TMP2:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [96 x i8], i32 }>* [[BLOCK]] to void ()* -// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP2]] to %struct.__block_literal_generic* -// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP4:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK4-NEXT: [[TMP5:%.*]] = load i8*, i8** [[TMP3]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP6]](i8* noundef [[TMP4]]) -// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i32* [[G1]] to i8* -// CHECK4-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK4-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1, i64 8, i8* [[TMP11]], void (i8*, i8*)* @.omp.reduction.reduction_func.3, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK4-NEXT: switch i32 [[TMP12]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK4-NEXT: [[TMP3:%.*]] = load volatile i32, i32* [[G]], align 128 +// CHECK4-NEXT: store volatile i32 [[TMP3]], i32* [[BLOCK_CAPTURED]], align 128 +// CHECK4-NEXT: [[TMP4:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, [96 x i8], i32 }>* [[BLOCK]] to void ()* +// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP4]] to %struct.__block_literal_generic* +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP6:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP8]](i8* noundef [[TMP6]]) +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP10:%.*]] = bitcast i32* [[G]] to i8* +// CHECK4-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK4-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 1, i64 8, i8* [[TMP13]], void (i8*, i8*)* @.omp.reduction.reduction_func.3, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: switch i32 [[TMP14]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.reduction.case1: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP0]], align 128 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[G1]], align 128 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 128 -// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 128 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[G]], align 128 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 128 +// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.case2: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[G1]], align 128 -// CHECK4-NEXT: [[TMP16:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP15]] monotonic, align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[G]], align 128 +// CHECK4-NEXT: [[TMP18:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP17]] monotonic, align 4 // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.default: // CHECK4-NEXT: ret void @@ -2631,6 +2718,7 @@ // CHECK4-NEXT: [[A2:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[C5:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK4-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 @@ -2649,14 +2737,22 @@ // CHECK4-NEXT: [[C6:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2 // CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C6]], align 8 // CHECK4-NEXT: store i32* [[TMP1]], i32** [[C5]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A2]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C5]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]], i32* [[TMP2]], i32* [[B4]], i32* [[TMP3]]) -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[B4]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP2]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A2]], align 8 +// CHECK4-NEXT: store i32* [[TMP4]], i32** [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: store i32* [[B4]], i32** [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[C5]], align 8 +// CHECK4-NEXT: store i32* [[TMP7]], i32** [[TMP6]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[B4]], align 4 // CHECK4-NEXT: [[B7:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 +// CHECK4-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i8 // CHECK4-NEXT: [[BF_LOAD8:%.*]] = load i8, i8* [[B7]], align 4 -// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP5]], 15 +// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP9]], 15 // CHECK4-NEXT: [[BF_CLEAR9:%.*]] = and i8 [[BF_LOAD8]], -16 // CHECK4-NEXT: [[BF_SET10:%.*]] = or i8 [[BF_CLEAR9]], [[BF_VALUE]] // CHECK4-NEXT: store i8 [[BF_SET10]], i8* [[B7]], align 4 @@ -2664,42 +2760,41 @@ // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[A2:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[B4:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C5:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[_TMP6:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, align 8 // CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 -// CHECK4-NEXT: store i32* [[TMP3]], i32** [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[A2]], align 4 -// CHECK4-NEXT: store i32* [[A2]], i32** [[_TMP3]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[B4]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[_TMP1]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[C5]], align 4 -// CHECK4-NEXT: store i32* [[C5]], i32** [[_TMP6]], align 8 +// CHECK4-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK4-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 8 +// CHECK4-NEXT: store i32* [[TMP8]], i32** [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK4-NEXT: store i32* [[A]], i32** [[_TMP2]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[B]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[C]], align 4 +// CHECK4-NEXT: store i32* [[C]], i32** [[_TMP3]], align 8 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 1 @@ -2711,62 +2806,62 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.7 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS_ADDR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[BLOCK_CAPTURED_THIS_ADDR]], align 8 +// CHECK4-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[BLOCK_CAPTURED_THIS_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK4-NEXT: store i32* [[TMP6]], i32** [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK4-NEXT: store i32 [[TMP7]], i32* [[BLOCK_CAPTURED7]], align 8 -// CHECK4-NEXT: [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP6]], align 8 -// CHECK4-NEXT: store i32* [[TMP8]], i32** [[BLOCK_CAPTURED8]], align 8 -// CHECK4-NEXT: [[TMP9:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]] to void ()* -// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP9]] to %struct.__block_literal_generic* -// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP11:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK4-NEXT: [[TMP12:%.*]] = load i8*, i8** [[TMP10]], align 8 -// CHECK4-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP13]](i8* noundef [[TMP11]]) -// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP15:%.*]] = bitcast i32* [[A2]] to i8* -// CHECK4-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 -// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP17:%.*]] = bitcast i32* [[B4]] to i8* -// CHECK4-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK4-NEXT: [[TMP19:%.*]] = bitcast i32* [[C5]] to i8* -// CHECK4-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 8 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK4-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], i32 3, i64 24, i8* [[TMP22]], void (i8*, i8*)* @.omp.reduction.reduction_func.8, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK4-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK4-NEXT: store i32* [[TMP11]], i32** [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[BLOCK_CAPTURED4:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[B]], align 4 +// CHECK4-NEXT: store i32 [[TMP12]], i32* [[BLOCK_CAPTURED4]], align 8 +// CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 7 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK4-NEXT: store i32* [[TMP13]], i32** [[BLOCK_CAPTURED5]], align 8 +// CHECK4-NEXT: [[TMP14:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]] to void ()* +// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP14]] to %struct.__block_literal_generic* +// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP16:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK4-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP15]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP18]](i8* noundef [[TMP16]]) +// CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP20:%.*]] = bitcast i32* [[A]] to i8* +// CHECK4-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 +// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK4-NEXT: [[TMP22:%.*]] = bitcast i32* [[B]] to i8* +// CHECK4-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 +// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK4-NEXT: [[TMP24:%.*]] = bitcast i32* [[C]] to i8* +// CHECK4-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK4-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]], i32 3, i64 24, i8* [[TMP27]], void (i8*, i8*)* @.omp.reduction.reduction_func.8, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: switch i32 [[TMP28]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.reduction.case1: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[A2]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[TMP4]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK4-NEXT: store i32 [[ADD9]], i32* [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[C5]], align 4 -// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK4-NEXT: store i32 [[ADD10]], i32* [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[A]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[B]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK4-NEXT: store i32 [[ADD6]], i32* [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[C]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK4-NEXT: store i32 [[ADD7]], i32* [[TMP10]], align 4 +// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.case2: -// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[A2]], align 4 -// CHECK4-NEXT: [[TMP31:%.*]] = atomicrmw add i32* [[TMP4]], i32 [[TMP30]] monotonic, align 4 -// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK4-NEXT: [[TMP33:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP32]] monotonic, align 4 -// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[C5]], align 4 -// CHECK4-NEXT: [[TMP35:%.*]] = atomicrmw add i32* [[TMP5]], i32 [[TMP34]] monotonic, align 4 +// CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[A]], align 4 +// CHECK4-NEXT: [[TMP36:%.*]] = atomicrmw add i32* [[TMP9]], i32 [[TMP35]] monotonic, align 4 +// CHECK4-NEXT: [[TMP37:%.*]] = load i32, i32* [[B]], align 4 +// CHECK4-NEXT: [[TMP38:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP37]] monotonic, align 4 +// CHECK4-NEXT: [[TMP39:%.*]] = load i32, i32* [[C]], align 4 +// CHECK4-NEXT: [[TMP40:%.*]] = atomicrmw add i32* [[TMP10]], i32 [[TMP39]] monotonic, align 4 // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.default: // CHECK4-NEXT: ret void @@ -2777,6 +2872,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>** [[BLOCK_ADDR]], align 8 @@ -2796,101 +2892,108 @@ // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 1 // CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR3]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR3]], align 8 +// CHECK4-NEXT: store i32* [[TMP7]], i32** [[TMP6]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR4:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 8 +// CHECK4-NEXT: store i32* [[BLOCK_CAPTURE_ADDR4]], i32** [[TMP8]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR5:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR5]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.SS* [[THIS]], i32* [[TMP5]], i32* [[BLOCK_CAPTURE_ADDR4]], i32* [[TMP6]]) +// CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[BLOCK_CAPTURE_ADDR5]], align 8 +// CHECK4-NEXT: store i32* [[TMP10]], i32** [[TMP9]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[A2:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[B4:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C5:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[_TMP6:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[C_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 -// CHECK4-NEXT: store i32* [[TMP3]], i32** [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[A2]], align 4 -// CHECK4-NEXT: store i32* [[A2]], i32** [[_TMP3]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[B4]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[_TMP1]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[C5]], align 4 -// CHECK4-NEXT: store i32* [[C5]], i32** [[_TMP6]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK4-NEXT: store i32 [[INC]], i32* [[TMP6]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP8]], -1 -// CHECK4-NEXT: store i32 [[DEC]], i32* [[B4]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP6]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP9]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP12:%.*]] = bitcast i32* [[A2]] to i8* -// CHECK4-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP14:%.*]] = bitcast i32* [[B4]] to i8* -// CHECK4-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK4-NEXT: [[TMP16:%.*]] = bitcast i32* [[C5]] to i8* -// CHECK4-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK4-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]], i32 3, i64 24, i8* [[TMP19]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK4-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK4-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK4-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 8 +// CHECK4-NEXT: store i32* [[TMP8]], i32** [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK4-NEXT: store i32* [[A]], i32** [[_TMP2]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[B]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[C]], align 4 +// CHECK4-NEXT: store i32* [[C]], i32** [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK4-NEXT: store i32 [[INC]], i32* [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[B]], align 4 +// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP13]], -1 +// CHECK4-NEXT: store i32 [[DEC]], i32* [[B]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 1 +// CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP14]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP17:%.*]] = bitcast i32* [[A]] to i8* +// CHECK4-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK4-NEXT: [[TMP19:%.*]] = bitcast i32* [[B]] to i8* +// CHECK4-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 8 +// CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK4-NEXT: [[TMP21:%.*]] = bitcast i32* [[C]] to i8* +// CHECK4-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK4-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 3, i64 24, i8* [[TMP24]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.reduction.case1: -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[A2]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[TMP4]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK4-NEXT: store i32 [[ADD7]], i32* [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[C5]], align 4 -// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[A]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[B]], align 4 +// CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[C]], align 4 +// CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] +// CHECK4-NEXT: store i32 [[ADD5]], i32* [[TMP10]], align 4 +// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.case2: -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[A2]], align 4 -// CHECK4-NEXT: [[TMP28:%.*]] = atomicrmw add i32* [[TMP4]], i32 [[TMP27]] monotonic, align 4 -// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK4-NEXT: [[TMP30:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP29]] monotonic, align 4 -// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[C5]], align 4 -// CHECK4-NEXT: [[TMP32:%.*]] = atomicrmw add i32* [[TMP5]], i32 [[TMP31]] monotonic, align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[A]], align 4 +// CHECK4-NEXT: [[TMP33:%.*]] = atomicrmw add i32* [[TMP9]], i32 [[TMP32]] monotonic, align 4 +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[B]], align 4 +// CHECK4-NEXT: [[TMP35:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP34]] monotonic, align 4 +// CHECK4-NEXT: [[TMP36:%.*]] = load i32, i32* [[C]], align 4 +// CHECK4-NEXT: [[TMP37:%.*]] = atomicrmw add i32* [[TMP10]], i32 [[TMP36]] monotonic, align 4 // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.default: // CHECK4-NEXT: ret void diff --git a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp --- a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp @@ -40,237 +40,243 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8** [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[ARGC_ADDR]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store i8** [[TMP2]], i8*** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP24:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP23:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint i8* [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP13]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP14]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8*, i8** [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint i8* [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP16]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint i8* [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store i8** [[_TMP5]], i8*** [[TMP]], align 8 -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint i8* [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store i8** [[_TMP4]], i8*** [[TMP]], align 8 +// CHECK1-NEXT: store i8* [[TMP24]], i8** [[_TMP4]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, i64* [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[TMP30]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP31]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8*, i8** [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load i8*, i8** [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, i8* [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP36]] -// CHECK1-NEXT: [[TMP37:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8*, i8** [[TMP37]], i64 9 -// CHECK1-NEXT: [[TMP38:%.*]] = load i8*, i8** [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[TMP38]], i64 [[LB_ADD_LEN9]] -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store i8* [[ARRAYIDX8]], i8** [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint i8* [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint i8* [[ARRAYIDX8]] to i64 -// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] -// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 -// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP45]], i64* [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..1 to i8*), i8** [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..2 to i8*), i8** [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, i32* [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 -// CHECK1-NEXT: [[TMP53:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* -// CHECK1-NEXT: [[TMP54:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]], i32 0, i32 2, i8* [[TMP53]]) -// CHECK1-NEXT: store i8* [[TMP54]], i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP55]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[ARGC1]], i32** [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP58:%.*]] = load i8**, i8*** [[TMP]], align 8 -// CHECK1-NEXT: store i8** [[TMP58]], i8*** [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP59]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP60]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP62:%.*]] = bitcast i8* [[TMP61]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP62]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP63]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP65:%.*]] = load i8*, i8** [[TMP64]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP65]], i8* align 8 [[TMP66]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP62]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP67]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP69:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store i8* [[TMP69]], i8** [[TMP68]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP71:%.*]] = load i32, i32* [[TMP70]], align 4 -// CHECK1-NEXT: [[TMP72:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP71]], i8* [[TMP61]]) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[ARGC]] to i8* +// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP2]] to i8* +// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, i64* [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP34]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP36:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP36]], i64 0 +// CHECK1-NEXT: [[TMP37:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP37]], i64 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 -1, [[TMP39]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8*, i8** [[TMP40]], i64 9 +// CHECK1-NEXT: [[TMP41:%.*]] = load i8*, i8** [[ARRAYIDX9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 [[LB_ADD_LEN8]] +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 +// CHECK1-NEXT: store i8* [[ARRAYIDX7]], i8** [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = ptrtoint i8* [[ARRAYIDX10]] to i64 +// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint i8* [[ARRAYIDX7]] to i64 +// CHECK1-NEXT: [[TMP45:%.*]] = sub i64 [[TMP43]], [[TMP44]] +// CHECK1-NEXT: [[TMP46:%.*]] = sdiv exact i64 [[TMP45]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP47:%.*]] = add nuw i64 [[TMP46]], 1 +// CHECK1-NEXT: [[TMP48:%.*]] = mul nuw i64 [[TMP47]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP48]], i64* [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..1 to i8*), i8** [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP51]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..2 to i8*), i8** [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, i32* [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[TMP54]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* +// CHECK1-NEXT: [[TMP57:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP55]], i32 0, i32 2, i8* [[TMP56]]) +// CHECK1-NEXT: store i8* [[TMP57]], i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP58]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[ARGC]], i32** [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP61:%.*]] = load i8**, i8*** [[TMP]], align 8 +// CHECK1-NEXT: store i8** [[TMP61]], i8*** [[TMP60]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, i32* [[TMP62]], align 4 +// CHECK1-NEXT: [[TMP64:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP63]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP65:%.*]] = bitcast i8* [[TMP64]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP65]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP66]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP68:%.*]] = load i8*, i8** [[TMP67]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP68]], i8* align 8 [[TMP69]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP65]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP70]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP72:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store i8* [[TMP72]], i8** [[TMP71]], align 8 // CHECK1-NEXT: [[TMP73:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP74:%.*]] = load i32, i32* [[TMP73]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP74]], i32 0) -// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP76:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP76]], i8** [[TMP75]], align 8 -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP77]], align 8 -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP79:%.*]] = inttoptr i64 [[TMP11]] to i8* +// CHECK1-NEXT: [[TMP75:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP74]], i8* [[TMP64]]) +// CHECK1-NEXT: [[TMP76:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = load i32, i32* [[TMP76]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP77]], i32 0) +// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP79:%.*]] = bitcast i32* [[ARGC]] to i8* // CHECK1-NEXT: store i8* [[TMP79]], i8** [[TMP78]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, i32* [[TMP80]], align 4 -// CHECK1-NEXT: [[TMP82:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP83:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP81]], i32 2, i64 24, i8* [[TMP82]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP83]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP80]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP82:%.*]] = inttoptr i64 [[TMP14]] to i8* +// CHECK1-NEXT: store i8* [[TMP82]], i8** [[TMP81]], align 8 +// CHECK1-NEXT: [[TMP83:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = load i32, i32* [[TMP83]], align 4 +// CHECK1-NEXT: [[TMP85:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP86:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP84]], i32 2, i64 24, i8* [[TMP85]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP86]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP84:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP84]], [[TMP85]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP86]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP87:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP87]], [[TMP88]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP89]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP87:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP87]] to i32 -// CHECK1-NEXT: [[TMP88:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP88]] to i32 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV]], [[CONV13]] -// CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK1-NEXT: store i8 [[CONV15]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP90:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP90]] to i32 +// CHECK1-NEXT: [[TMP91:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV12:%.*]] = sext i8 [[TMP91]] to i32 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV]], [[CONV12]] +// CHECK1-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i8 +// CHECK1-NEXT: store i8 [[CONV14]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP86]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP81]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP89]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done17: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP84]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP89:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP90:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP89]] monotonic, align 4 -// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP91]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] -// CHECK1: omp.arraycpy.body20: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP92:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP92]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST22]] monotonic, align 1 +// CHECK1-NEXT: [[TMP92:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP93:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP92]] monotonic, align 4 +// CHECK1-NEXT: [[TMP94:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP94]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE31:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] +// CHECK1: omp.arraycpy.body19: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT29:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT28:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP95:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV22:%.*]] = sext i8 [[TMP95]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST21]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP93:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY20]] ], [ [[TMP98:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP93]], i8* [[_TMP24]], align 1 -// CHECK1-NEXT: [[TMP94:%.*]] = load i8, i8* [[_TMP24]], align 1 -// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP94]] to i32 -// CHECK1-NEXT: [[TMP95:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP95]] to i32 -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[CONV25]], [[CONV26]] -// CHECK1-NEXT: [[CONV28:%.*]] = trunc i32 [[ADD27]] to i8 -// CHECK1-NEXT: store i8 [[CONV28]], i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP96:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP97:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i8 [[TMP93]], i8 [[TMP96]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP98]] = extractvalue { i8, i1 } [[TMP97]], 0 -// CHECK1-NEXT: [[TMP99:%.*]] = extractvalue { i8, i1 } [[TMP97]], 1 -// CHECK1-NEXT: br i1 [[TMP99]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP96:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY19]] ], [ [[TMP101:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP96]], i8* [[_TMP23]], align 1 +// CHECK1-NEXT: [[TMP97:%.*]] = load i8, i8* [[_TMP23]], align 1 +// CHECK1-NEXT: [[CONV24:%.*]] = sext i8 [[TMP97]] to i32 +// CHECK1-NEXT: [[TMP98:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP98]] to i32 +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[CONV24]], [[CONV25]] +// CHECK1-NEXT: [[CONV27:%.*]] = trunc i32 [[ADD26]] to i8 +// CHECK1-NEXT: store i8 [[CONV27]], i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP99:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP100:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i8 [[TMP96]], i8 [[TMP99]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP101]] = extractvalue { i8, i1 } [[TMP100]], 0 +// CHECK1-NEXT: [[TMP102:%.*]] = extractvalue { i8, i1 } [[TMP100]], 1 +// CHECK1-NEXT: br i1 [[TMP102]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP91]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY20]] -// CHECK1: omp.arraycpy.done32: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT28]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT29]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE30:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT28]], [[TMP94]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_BODY19]] +// CHECK1: omp.arraycpy.done31: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP100:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP100]]) +// CHECK1-NEXT: [[TMP103:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP103]]) // CHECK1-NEXT: ret void // // @@ -380,7 +386,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP4_I:%.*]] = alloca i8*, align 8 @@ -394,7 +400,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -407,29 +413,29 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load i8*, i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 // CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load i8**, i8*** [[TMP30]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i8*, i8** [[TMP31]], i64 9 // CHECK1-NEXT: [[TMP32:%.*]] = load i8*, i8** [[ARRAYIDX2_I]], align 8 @@ -440,10 +446,10 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 // CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint i8* [[TMP43]] to i64 diff --git a/clang/test/OpenMP/parallel_sections_codegen.cpp b/clang/test/OpenMP/parallel_sections_codegen.cpp --- a/clang/test/OpenMP/parallel_sections_codegen.cpp +++ b/clang/test/OpenMP/parallel_sections_codegen.cpp @@ -57,17 +57,19 @@ // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: ret i32 [[CALL]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -75,28 +77,30 @@ // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK1-NEXT: ] @@ -113,18 +117,18 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP10:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP11:%.*]] = extractvalue { i8*, i32 } [[TMP10]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP11]]) #[[ATTR7:[0-9]+]] +// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR7:[0-9]+]] // CHECK1-NEXT: unreachable // // @@ -138,15 +142,17 @@ // CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -154,28 +160,30 @@ // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: @@ -186,17 +194,17 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP10:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP11:%.*]] = extractvalue { i8*, i32 } [[TMP10]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP11]]) #[[ATTR7]] +// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR7]] // CHECK1-NEXT: unreachable // diff --git a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp --- a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp @@ -40,279 +40,285 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8** [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[ARGC_ADDR]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store i8** [[TMP2]], i8*** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP24:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP23:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint i8* [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP13]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP14]] +// CHECK1-NEXT: store i32 0, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8*, i8** [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint i8* [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP16]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint i8* [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store i8** [[_TMP5]], i8*** [[TMP]], align 8 -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint i8* [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store i8** [[_TMP4]], i8*** [[TMP]], align 8 +// CHECK1-NEXT: store i8* [[TMP24]], i8** [[_TMP4]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, i64* [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[TMP30]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP31]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8*, i8** [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load i8*, i8** [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, i8* [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP36]] -// CHECK1-NEXT: [[TMP37:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8*, i8** [[TMP37]], i64 9 -// CHECK1-NEXT: [[TMP38:%.*]] = load i8*, i8** [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[TMP38]], i64 [[LB_ADD_LEN9]] -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store i8* [[ARRAYIDX8]], i8** [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint i8* [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint i8* [[ARRAYIDX8]] to i64 -// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] -// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 -// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP45]], i64* [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..1 to i8*), i8** [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..2 to i8*), i8** [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, i32* [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 -// CHECK1-NEXT: [[TMP53:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* -// CHECK1-NEXT: [[TMP54:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, i8* [[TMP53]]) -// CHECK1-NEXT: store i8* [[TMP54]], i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP56]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP57:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP58:%.*]] = icmp slt i32 [[TMP57]], 0 -// CHECK1-NEXT: [[TMP59:%.*]] = select i1 [[TMP58]], i32 [[TMP57]], i32 0 -// CHECK1-NEXT: store i32 [[TMP59]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP60]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[ARGC]] to i8* +// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP2]] to i8* +// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, i64* [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP34]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP36:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP36]], i64 0 +// CHECK1-NEXT: [[TMP37:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP37]], i64 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 -1, [[TMP39]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8*, i8** [[TMP40]], i64 9 +// CHECK1-NEXT: [[TMP41:%.*]] = load i8*, i8** [[ARRAYIDX9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 [[LB_ADD_LEN8]] +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 +// CHECK1-NEXT: store i8* [[ARRAYIDX7]], i8** [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = ptrtoint i8* [[ARRAYIDX10]] to i64 +// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint i8* [[ARRAYIDX7]] to i64 +// CHECK1-NEXT: [[TMP45:%.*]] = sub i64 [[TMP43]], [[TMP44]] +// CHECK1-NEXT: [[TMP46:%.*]] = sdiv exact i64 [[TMP45]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP47:%.*]] = add nuw i64 [[TMP46]], 1 +// CHECK1-NEXT: [[TMP48:%.*]] = mul nuw i64 [[TMP47]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP48]], i64* [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..1 to i8*), i8** [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP51]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..2 to i8*), i8** [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, i32* [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[TMP54]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* +// CHECK1-NEXT: [[TMP57:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP55]], i32 1, i32 2, i8* [[TMP56]]) +// CHECK1-NEXT: store i8* [[TMP57]], i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, i32* [[TMP58]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP59]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP61:%.*]] = icmp slt i32 [[TMP60]], 0 +// CHECK1-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[TMP60]], i32 0 +// CHECK1-NEXT: store i32 [[TMP62]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP63]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP61:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP62:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP61]], [[TMP62]] +// CHECK1-NEXT: [[TMP64:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP65:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP64]], [[TMP65]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP63:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP63]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP66:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP66]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP64]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[ARGC1]], i32** [[TMP65]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP67:%.*]] = load i8**, i8*** [[TMP]], align 8 -// CHECK1-NEXT: store i8** [[TMP67]], i8*** [[TMP66]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP69:%.*]] = load i32, i32* [[TMP68]], align 4 -// CHECK1-NEXT: [[TMP70:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP69]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP71:%.*]] = bitcast i8* [[TMP70]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP71]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP72]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP74:%.*]] = load i8*, i8** [[TMP73]], align 8 -// CHECK1-NEXT: [[TMP75:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP74]], i8* align 8 [[TMP75]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP71]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP76]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP78:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store i8* [[TMP78]], i8** [[TMP77]], align 8 -// CHECK1-NEXT: [[TMP79:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = load i32, i32* [[TMP79]], align 4 -// CHECK1-NEXT: [[TMP81:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP80]], i8* [[TMP70]]) +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP67]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[ARGC]], i32** [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP70:%.*]] = load i8**, i8*** [[TMP]], align 8 +// CHECK1-NEXT: store i8** [[TMP70]], i8*** [[TMP69]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load i32, i32* [[TMP71]], align 4 +// CHECK1-NEXT: [[TMP73:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP72]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP74:%.*]] = bitcast i8* [[TMP73]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP74]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP75]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP77:%.*]] = load i8*, i8** [[TMP76]], align 8 +// CHECK1-NEXT: [[TMP78:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP77]], i8* align 8 [[TMP78]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP74]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP79]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP81:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store i8* [[TMP81]], i8** [[TMP80]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP83:%.*]] = load i32, i32* [[TMP82]], align 4 +// CHECK1-NEXT: [[TMP84:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP83]], i8* [[TMP73]]) // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP82:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP82]], 1 +// CHECK1-NEXT: [[TMP85:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP85]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: [[TMP83:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP84:%.*]] = load i32, i32* [[TMP83]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP84]]) -// CHECK1-NEXT: [[TMP85:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP86:%.*]] = load i32, i32* [[TMP85]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP86]], i32 1) -// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP88:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP88]], i8** [[TMP87]], align 8 -// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP89]], align 8 -// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP91:%.*]] = inttoptr i64 [[TMP11]] to i8* +// CHECK1-NEXT: [[TMP86:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP87:%.*]] = load i32, i32* [[TMP86]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP87]]) +// CHECK1-NEXT: [[TMP88:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP89:%.*]] = load i32, i32* [[TMP88]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP89]], i32 1) +// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP91:%.*]] = bitcast i32* [[ARGC]] to i8* // CHECK1-NEXT: store i8* [[TMP91]], i8** [[TMP90]], align 8 -// CHECK1-NEXT: [[TMP92:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP93:%.*]] = load i32, i32* [[TMP92]], align 4 -// CHECK1-NEXT: [[TMP94:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP95:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP93]], i32 2, i64 24, i8* [[TMP94]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP95]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP92:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP92]], align 8 +// CHECK1-NEXT: [[TMP93:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP94:%.*]] = inttoptr i64 [[TMP14]] to i8* +// CHECK1-NEXT: store i8* [[TMP94]], i8** [[TMP93]], align 8 +// CHECK1-NEXT: [[TMP95:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP96:%.*]] = load i32, i32* [[TMP95]], align 4 +// CHECK1-NEXT: [[TMP97:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP98:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP96]], i32 2, i64 24, i8* [[TMP97]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP98]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP96:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP97:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP96]], [[TMP97]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP98]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP99:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP100:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP99]], [[TMP100]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP101:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP101]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP99:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP99]] to i32 -// CHECK1-NEXT: [[TMP100:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP100]] to i32 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV]], [[CONV13]] -// CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK1-NEXT: store i8 [[CONV15]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP102:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP102]] to i32 +// CHECK1-NEXT: [[TMP103:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV12:%.*]] = sext i8 [[TMP103]] to i32 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV]], [[CONV12]] +// CHECK1-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i8 +// CHECK1-NEXT: store i8 [[CONV14]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP98]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP93]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP101]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done17: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP96]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP101:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP102:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP101]] monotonic, align 4 -// CHECK1-NEXT: [[TMP103:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP103]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] -// CHECK1: omp.arraycpy.body20: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP104:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP104]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST22]] monotonic, align 1 +// CHECK1-NEXT: [[TMP104:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP105:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP104]] monotonic, align 4 +// CHECK1-NEXT: [[TMP106:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP106]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE31:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] +// CHECK1: omp.arraycpy.body19: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT29:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT28:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP107:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV22:%.*]] = sext i8 [[TMP107]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST21]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP105:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY20]] ], [ [[TMP110:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP105]], i8* [[_TMP24]], align 1 -// CHECK1-NEXT: [[TMP106:%.*]] = load i8, i8* [[_TMP24]], align 1 -// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP106]] to i32 -// CHECK1-NEXT: [[TMP107:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP107]] to i32 -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[CONV25]], [[CONV26]] -// CHECK1-NEXT: [[CONV28:%.*]] = trunc i32 [[ADD27]] to i8 -// CHECK1-NEXT: store i8 [[CONV28]], i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP108:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP109:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i8 [[TMP105]], i8 [[TMP108]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP110]] = extractvalue { i8, i1 } [[TMP109]], 0 -// CHECK1-NEXT: [[TMP111:%.*]] = extractvalue { i8, i1 } [[TMP109]], 1 -// CHECK1-NEXT: br i1 [[TMP111]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP108:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY19]] ], [ [[TMP113:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP108]], i8* [[_TMP23]], align 1 +// CHECK1-NEXT: [[TMP109:%.*]] = load i8, i8* [[_TMP23]], align 1 +// CHECK1-NEXT: [[CONV24:%.*]] = sext i8 [[TMP109]] to i32 +// CHECK1-NEXT: [[TMP110:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP110]] to i32 +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[CONV24]], [[CONV25]] +// CHECK1-NEXT: [[CONV27:%.*]] = trunc i32 [[ADD26]] to i8 +// CHECK1-NEXT: store i8 [[CONV27]], i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP111:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP112:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i8 [[TMP108]], i8 [[TMP111]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP113]] = extractvalue { i8, i1 } [[TMP112]], 0 +// CHECK1-NEXT: [[TMP114:%.*]] = extractvalue { i8, i1 } [[TMP112]], 1 +// CHECK1-NEXT: br i1 [[TMP114]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP103]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY20]] -// CHECK1: omp.arraycpy.done32: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT28]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT29]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE30:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT28]], [[TMP106]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_BODY19]] +// CHECK1: omp.arraycpy.done31: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP112:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP112]]) +// CHECK1-NEXT: [[TMP115:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP115]]) // CHECK1-NEXT: ret void // // @@ -422,7 +428,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP4_I:%.*]] = alloca i8*, align 8 @@ -436,7 +442,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -449,29 +455,29 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load i8*, i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 // CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load i8**, i8*** [[TMP30]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i8*, i8** [[TMP31]], i64 9 // CHECK1-NEXT: [[TMP32:%.*]] = load i8*, i8** [[ARRAYIDX2_I]], align 8 @@ -482,10 +488,10 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 // CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint i8* [[TMP43]] to i64 diff --git a/clang/test/OpenMP/reduction_compound_op.cpp b/clang/test/OpenMP/reduction_compound_op.cpp --- a/clang/test/OpenMP/reduction_compound_op.cpp +++ b/clang/test/OpenMP/reduction_compound_op.cpp @@ -18,7 +18,6 @@ //RUN: %clang_cc1 -no-opaque-pointers -no-enable-noundef-analysis -triple x86_64-unknown-linux-gnu -fopenmp-simd -DNORM -DCOMP \ //RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix SIMD-ONLY -// SIMD-ONLY-NOT: {{__kmpc|__tgt}} struct Point { int x = 0; @@ -85,17 +84,73 @@ // NORM-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca %struct.Point*, align 8 // NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_4:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_6:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_7:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // NORM-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // NORM-NEXT: store %struct.Point* [[POINTS]], %struct.Point** [[POINTS_ADDR]], align 8 // NORM-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4:[0-9]+]] -// NORM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]]) -// NORM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]]) -// NORM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]]) -// NORM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]]) -// NORM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]]) -// NORM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]]) -// NORM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]]) -// NORM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]]) +// NORM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// NORM-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// NORM-NEXT: store %struct.Point* [[RED]], %struct.Point** [[TMP1]], align 8 +// NORM-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// NORM-NEXT: store %struct.Point** [[POINTS_ADDR]], %struct.Point*** [[TMP2]], align 8 +// NORM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// NORM-NEXT: store i32* [[N_ADDR]], i32** [[TMP3]], align 8 +// NORM-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 1 +// NORM-NEXT: store %struct.Point* [[RED]], %struct.Point** [[TMP4]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 2 +// NORM-NEXT: store %struct.Point** [[POINTS_ADDR]], %struct.Point*** [[TMP5]], align 8 +// NORM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]) +// NORM-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// NORM-NEXT: store i32* [[N_ADDR]], i32** [[TMP6]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 1 +// NORM-NEXT: store %struct.Point* [[RED]], %struct.Point** [[TMP7]], align 8 +// NORM-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 2 +// NORM-NEXT: store %struct.Point** [[POINTS_ADDR]], %struct.Point*** [[TMP8]], align 8 +// NORM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]]) +// NORM-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_3]], i32 0, i32 0 +// NORM-NEXT: store i32* [[N_ADDR]], i32** [[TMP9]], align 8 +// NORM-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_3]], i32 0, i32 1 +// NORM-NEXT: store %struct.Point* [[RED]], %struct.Point** [[TMP10]], align 8 +// NORM-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_3]], i32 0, i32 2 +// NORM-NEXT: store %struct.Point** [[POINTS_ADDR]], %struct.Point*** [[TMP11]], align 8 +// NORM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_3]]) +// NORM-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 0 +// NORM-NEXT: store i32* [[N_ADDR]], i32** [[TMP12]], align 8 +// NORM-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 1 +// NORM-NEXT: store %struct.Point* [[RED]], %struct.Point** [[TMP13]], align 8 +// NORM-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 2 +// NORM-NEXT: store %struct.Point** [[POINTS_ADDR]], %struct.Point*** [[TMP14]], align 8 +// NORM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_4]]) +// NORM-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 0 +// NORM-NEXT: store i32* [[N_ADDR]], i32** [[TMP15]], align 8 +// NORM-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 1 +// NORM-NEXT: store %struct.Point* [[RED]], %struct.Point** [[TMP16]], align 8 +// NORM-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 2 +// NORM-NEXT: store %struct.Point** [[POINTS_ADDR]], %struct.Point*** [[TMP17]], align 8 +// NORM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]]) +// NORM-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_6]], i32 0, i32 0 +// NORM-NEXT: store i32* [[N_ADDR]], i32** [[TMP18]], align 8 +// NORM-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_6]], i32 0, i32 1 +// NORM-NEXT: store %struct.Point* [[RED]], %struct.Point** [[TMP19]], align 8 +// NORM-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_6]], i32 0, i32 2 +// NORM-NEXT: store %struct.Point** [[POINTS_ADDR]], %struct.Point*** [[TMP20]], align 8 +// NORM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_6]]) +// NORM-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 0 +// NORM-NEXT: store i32* [[N_ADDR]], i32** [[TMP21]], align 8 +// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 1 +// NORM-NEXT: store %struct.Point* [[RED]], %struct.Point** [[TMP22]], align 8 +// NORM-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 2 +// NORM-NEXT: store %struct.Point** [[POINTS_ADDR]], %struct.Point*** [[TMP23]], align 8 +// NORM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_7]]) // NORM-NEXT: ret void // // @@ -110,13 +165,11 @@ // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined. -// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2:[0-9]+]] { +// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -126,111 +179,113 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// NORM-NEXT: store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8 -// NORM-NEXT: store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load %struct.Point*, %struct.Point** [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load %struct.Point**, %struct.Point*** [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, i32* [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], i32* [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], i32* [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load %struct.Point*, %struct.Point** [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], %struct.Point* [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8* -// NORM-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 // NORM-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// NORM-NEXT: [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// NORM-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: [[TMP27:%.*]] = bitcast %struct.Point* [[RED]] to i8* +// NORM-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// NORM-NEXT: [[TMP30:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// NORM-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP29]], i32 1, i64 8, i8* [[TMP30]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: [[TMP28:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64* -// NORM-NEXT: store i64 [[CALL]], i64* [[TMP28]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: [[TMP32:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64* +// NORM-NEXT: store i64 [[CALL]], i64* [[TMP32]], align 4 +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// NORM-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointplERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: [[TMP31:%.*]] = bitcast %struct.Point* [[REF_TMP10]] to i64* -// NORM-NEXT: store i64 [[CALL11]], i64* [[TMP31]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// NORM-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointplERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: [[TMP35:%.*]] = bitcast %struct.Point* [[REF_TMP9]] to i64* +// NORM-NEXT: store i64 [[CALL10]], i64* [[TMP35]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -264,13 +319,11 @@ // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined..1 -// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -280,111 +333,113 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// NORM-NEXT: store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8 -// NORM-NEXT: store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load %struct.Point*, %struct.Point** [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load %struct.Point**, %struct.Point*** [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, i32* [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], i32* [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], i32* [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load %struct.Point*, %struct.Point** [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], %struct.Point* [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8* -// NORM-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 // NORM-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// NORM-NEXT: [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// NORM-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: [[TMP27:%.*]] = bitcast %struct.Point* [[RED]] to i8* +// NORM-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// NORM-NEXT: [[TMP30:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// NORM-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], i32 1, i64 8, i8* [[TMP30]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: [[TMP28:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64* -// NORM-NEXT: store i64 [[CALL]], i64* [[TMP28]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: [[TMP32:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64* +// NORM-NEXT: store i64 [[CALL]], i64* [[TMP32]], align 4 +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// NORM-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointplERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: [[TMP31:%.*]] = bitcast %struct.Point* [[REF_TMP10]] to i64* -// NORM-NEXT: store i64 [[CALL11]], i64* [[TMP31]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// NORM-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointplERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: [[TMP35:%.*]] = bitcast %struct.Point* [[REF_TMP9]] to i64* +// NORM-NEXT: store i64 [[CALL10]], i64* [[TMP35]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -418,13 +473,11 @@ // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined..3 -// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -434,111 +487,113 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// NORM-NEXT: store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8 -// NORM-NEXT: store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load %struct.Point*, %struct.Point** [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load %struct.Point**, %struct.Point*** [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, i32* [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], i32* [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], i32* [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load %struct.Point*, %struct.Point** [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], %struct.Point* [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8* -// NORM-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 // NORM-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// NORM-NEXT: [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// NORM-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.4, [8 x i32]* @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: [[TMP27:%.*]] = bitcast %struct.Point* [[RED]] to i8* +// NORM-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// NORM-NEXT: [[TMP30:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// NORM-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], i32 1, i64 8, i8* [[TMP30]], void (i8*, i8*)* @.omp.reduction.reduction_func.4, [8 x i32]* @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointmlERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: [[TMP28:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64* -// NORM-NEXT: store i64 [[CALL]], i64* [[TMP28]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointmlERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: [[TMP32:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64* +// NORM-NEXT: store i64 [[CALL]], i64* [[TMP32]], align 4 +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// NORM-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointmlERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: [[TMP31:%.*]] = bitcast %struct.Point* [[REF_TMP10]] to i64* -// NORM-NEXT: store i64 [[CALL11]], i64* [[TMP31]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// NORM-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointmlERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: [[TMP35:%.*]] = bitcast %struct.Point* [[REF_TMP9]] to i64* +// NORM-NEXT: store i64 [[CALL10]], i64* [[TMP35]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -572,13 +627,11 @@ // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined..5 -// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -588,111 +641,113 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// NORM-NEXT: store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8 -// NORM-NEXT: store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load %struct.Point*, %struct.Point** [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load %struct.Point**, %struct.Point*** [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, i32* [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], i32* [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], i32* [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load %struct.Point*, %struct.Point** [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], %struct.Point* [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8* -// NORM-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 // NORM-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// NORM-NEXT: [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// NORM-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: [[TMP27:%.*]] = bitcast %struct.Point* [[RED]] to i8* +// NORM-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// NORM-NEXT: [[TMP30:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// NORM-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], i32 1, i64 8, i8* [[TMP30]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointanERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: [[TMP28:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64* -// NORM-NEXT: store i64 [[CALL]], i64* [[TMP28]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointanERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: [[TMP32:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64* +// NORM-NEXT: store i64 [[CALL]], i64* [[TMP32]], align 4 +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// NORM-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointanERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: [[TMP31:%.*]] = bitcast %struct.Point* [[REF_TMP10]] to i64* -// NORM-NEXT: store i64 [[CALL11]], i64* [[TMP31]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// NORM-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointanERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: [[TMP35:%.*]] = bitcast %struct.Point* [[REF_TMP9]] to i64* +// NORM-NEXT: store i64 [[CALL10]], i64* [[TMP35]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -726,13 +781,11 @@ // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined..7 -// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -742,111 +795,113 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// NORM-NEXT: store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8 -// NORM-NEXT: store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load %struct.Point*, %struct.Point** [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load %struct.Point**, %struct.Point*** [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, i32* [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], i32* [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], i32* [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load %struct.Point*, %struct.Point** [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], %struct.Point* [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8* -// NORM-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 // NORM-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// NORM-NEXT: [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// NORM-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.8, [8 x i32]* @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: [[TMP27:%.*]] = bitcast %struct.Point* [[RED]] to i8* +// NORM-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// NORM-NEXT: [[TMP30:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// NORM-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], i32 1, i64 8, i8* [[TMP30]], void (i8*, i8*)* @.omp.reduction.reduction_func.8, [8 x i32]* @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointorERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: [[TMP28:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64* -// NORM-NEXT: store i64 [[CALL]], i64* [[TMP28]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointorERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: [[TMP32:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64* +// NORM-NEXT: store i64 [[CALL]], i64* [[TMP32]], align 4 +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// NORM-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointorERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: [[TMP31:%.*]] = bitcast %struct.Point* [[REF_TMP10]] to i64* -// NORM-NEXT: store i64 [[CALL11]], i64* [[TMP31]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// NORM-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointorERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: [[TMP35:%.*]] = bitcast %struct.Point* [[REF_TMP9]] to i64* +// NORM-NEXT: store i64 [[CALL10]], i64* [[TMP35]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -880,13 +935,11 @@ // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined..9 -// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -896,111 +949,113 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// NORM-NEXT: store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8 -// NORM-NEXT: store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load %struct.Point*, %struct.Point** [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load %struct.Point**, %struct.Point*** [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, i32* [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], i32* [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], i32* [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load %struct.Point*, %struct.Point** [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], %struct.Point* [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8* -// NORM-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 // NORM-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// NORM-NEXT: [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// NORM-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.10, [8 x i32]* @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: [[TMP27:%.*]] = bitcast %struct.Point* [[RED]] to i8* +// NORM-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// NORM-NEXT: [[TMP30:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// NORM-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], i32 1, i64 8, i8* [[TMP30]], void (i8*, i8*)* @.omp.reduction.reduction_func.10, [8 x i32]* @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointeoERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: [[TMP28:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64* -// NORM-NEXT: store i64 [[CALL]], i64* [[TMP28]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointeoERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: [[TMP32:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64* +// NORM-NEXT: store i64 [[CALL]], i64* [[TMP32]], align 4 +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// NORM-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointeoERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: [[TMP31:%.*]] = bitcast %struct.Point* [[REF_TMP10]] to i64* -// NORM-NEXT: store i64 [[CALL11]], i64* [[TMP31]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// NORM-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointeoERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: [[TMP35:%.*]] = bitcast %struct.Point* [[REF_TMP9]] to i64* +// NORM-NEXT: store i64 [[CALL10]], i64* [[TMP35]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -1034,13 +1089,11 @@ // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined..11 -// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1050,111 +1103,113 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// NORM-NEXT: store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8 -// NORM-NEXT: store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load %struct.Point*, %struct.Point** [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load %struct.Point**, %struct.Point*** [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, i32* [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], i32* [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], i32* [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load %struct.Point*, %struct.Point** [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], %struct.Point* [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8* -// NORM-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 // NORM-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// NORM-NEXT: [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// NORM-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.12, [8 x i32]* @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: [[TMP27:%.*]] = bitcast %struct.Point* [[RED]] to i8* +// NORM-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// NORM-NEXT: [[TMP30:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// NORM-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], i32 1, i64 8, i8* [[TMP30]], void (i8*, i8*)* @.omp.reduction.reduction_func.12, [8 x i32]* @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: [[TMP28:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64* -// NORM-NEXT: store i64 [[CALL]], i64* [[TMP28]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: [[TMP32:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64* +// NORM-NEXT: store i64 [[CALL]], i64* [[TMP32]], align 4 +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// NORM-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointaaERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: [[TMP31:%.*]] = bitcast %struct.Point* [[REF_TMP10]] to i64* -// NORM-NEXT: store i64 [[CALL11]], i64* [[TMP31]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// NORM-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointaaERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: [[TMP35:%.*]] = bitcast %struct.Point* [[REF_TMP9]] to i64* +// NORM-NEXT: store i64 [[CALL10]], i64* [[TMP35]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -1188,13 +1243,11 @@ // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined..13 -// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1204,111 +1257,113 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// NORM-NEXT: store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8 -// NORM-NEXT: store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load %struct.Point*, %struct.Point** [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load %struct.Point**, %struct.Point*** [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, i32* [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], i32* [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], i32* [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load %struct.Point*, %struct.Point** [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], %struct.Point* [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8* -// NORM-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 // NORM-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// NORM-NEXT: [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// NORM-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.14, [8 x i32]* @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: [[TMP27:%.*]] = bitcast %struct.Point* [[RED]] to i8* +// NORM-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// NORM-NEXT: [[TMP30:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// NORM-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], i32 1, i64 8, i8* [[TMP30]], void (i8*, i8*)* @.omp.reduction.reduction_func.14, [8 x i32]* @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: [[TMP28:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64* -// NORM-NEXT: store i64 [[CALL]], i64* [[TMP28]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: [[TMP32:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64* +// NORM-NEXT: store i64 [[CALL]], i64* [[TMP32]], align 4 +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// NORM-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointooERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: [[TMP31:%.*]] = bitcast %struct.Point* [[REF_TMP10]] to i64* -// NORM-NEXT: store i64 [[CALL11]], i64* [[TMP31]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// NORM-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointooERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: [[TMP35:%.*]] = bitcast %struct.Point* [[REF_TMP9]] to i64* +// NORM-NEXT: store i64 [[CALL10]], i64* [[TMP35]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -1360,17 +1415,73 @@ // COMP-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca %struct.Point*, align 8 // COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_4:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_6:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_7:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // COMP-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // COMP-NEXT: store %struct.Point* [[POINTS]], %struct.Point** [[POINTS_ADDR]], align 8 // COMP-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4:[0-9]+]] -// COMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]]) -// COMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]]) -// COMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]]) -// COMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]]) -// COMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]]) -// COMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]]) -// COMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]]) -// COMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]]) +// COMP-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// COMP-NEXT: store i32* [[N_ADDR]], i32** [[TMP0]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// COMP-NEXT: store %struct.Point* [[RED]], %struct.Point** [[TMP1]], align 8 +// COMP-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// COMP-NEXT: store %struct.Point** [[POINTS_ADDR]], %struct.Point*** [[TMP2]], align 8 +// COMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// COMP-NEXT: store i32* [[N_ADDR]], i32** [[TMP3]], align 8 +// COMP-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 1 +// COMP-NEXT: store %struct.Point* [[RED]], %struct.Point** [[TMP4]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 2 +// COMP-NEXT: store %struct.Point** [[POINTS_ADDR]], %struct.Point*** [[TMP5]], align 8 +// COMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]) +// COMP-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// COMP-NEXT: store i32* [[N_ADDR]], i32** [[TMP6]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 1 +// COMP-NEXT: store %struct.Point* [[RED]], %struct.Point** [[TMP7]], align 8 +// COMP-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 2 +// COMP-NEXT: store %struct.Point** [[POINTS_ADDR]], %struct.Point*** [[TMP8]], align 8 +// COMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]]) +// COMP-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_3]], i32 0, i32 0 +// COMP-NEXT: store i32* [[N_ADDR]], i32** [[TMP9]], align 8 +// COMP-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_3]], i32 0, i32 1 +// COMP-NEXT: store %struct.Point* [[RED]], %struct.Point** [[TMP10]], align 8 +// COMP-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_3]], i32 0, i32 2 +// COMP-NEXT: store %struct.Point** [[POINTS_ADDR]], %struct.Point*** [[TMP11]], align 8 +// COMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_3]]) +// COMP-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 0 +// COMP-NEXT: store i32* [[N_ADDR]], i32** [[TMP12]], align 8 +// COMP-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 1 +// COMP-NEXT: store %struct.Point* [[RED]], %struct.Point** [[TMP13]], align 8 +// COMP-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 2 +// COMP-NEXT: store %struct.Point** [[POINTS_ADDR]], %struct.Point*** [[TMP14]], align 8 +// COMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_4]]) +// COMP-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 0 +// COMP-NEXT: store i32* [[N_ADDR]], i32** [[TMP15]], align 8 +// COMP-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 1 +// COMP-NEXT: store %struct.Point* [[RED]], %struct.Point** [[TMP16]], align 8 +// COMP-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 2 +// COMP-NEXT: store %struct.Point** [[POINTS_ADDR]], %struct.Point*** [[TMP17]], align 8 +// COMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]]) +// COMP-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_6]], i32 0, i32 0 +// COMP-NEXT: store i32* [[N_ADDR]], i32** [[TMP18]], align 8 +// COMP-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_6]], i32 0, i32 1 +// COMP-NEXT: store %struct.Point* [[RED]], %struct.Point** [[TMP19]], align 8 +// COMP-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_6]], i32 0, i32 2 +// COMP-NEXT: store %struct.Point** [[POINTS_ADDR]], %struct.Point*** [[TMP20]], align 8 +// COMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_6]]) +// COMP-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 0 +// COMP-NEXT: store i32* [[N_ADDR]], i32** [[TMP21]], align 8 +// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 1 +// COMP-NEXT: store %struct.Point* [[RED]], %struct.Point** [[TMP22]], align 8 +// COMP-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 2 +// COMP-NEXT: store %struct.Point** [[POINTS_ADDR]], %struct.Point*** [[TMP23]], align 8 +// COMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_7]]) // COMP-NEXT: ret void // // @@ -1385,13 +1496,11 @@ // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined. -// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2:[0-9]+]] { +// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1401,103 +1510,105 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // COMP-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// COMP-NEXT: store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8 -// COMP-NEXT: store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load %struct.Point*, %struct.Point** [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load %struct.Point**, %struct.Point*** [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, i32* [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], i32* [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], i32* [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load %struct.Point*, %struct.Point** [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], %struct.Point* [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8* -// COMP-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 // COMP-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// COMP-NEXT: [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// COMP-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: [[TMP27:%.*]] = bitcast %struct.Point* [[RED]] to i8* +// COMP-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// COMP-NEXT: [[TMP30:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// COMP-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP29]], i32 1, i64 8, i8* [[TMP30]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointpLERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointpLERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 -// COMP-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointpLERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// COMP-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointpLERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -1527,13 +1638,11 @@ // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined..1 -// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1543,103 +1652,105 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // COMP-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// COMP-NEXT: store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8 -// COMP-NEXT: store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load %struct.Point*, %struct.Point** [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load %struct.Point**, %struct.Point*** [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, i32* [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], i32* [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], i32* [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load %struct.Point*, %struct.Point** [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], %struct.Point* [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8* -// COMP-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 // COMP-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// COMP-NEXT: [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// COMP-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: [[TMP27:%.*]] = bitcast %struct.Point* [[RED]] to i8* +// COMP-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// COMP-NEXT: [[TMP30:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// COMP-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], i32 1, i64 8, i8* [[TMP30]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointpLERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointpLERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 -// COMP-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointpLERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// COMP-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointpLERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -1669,13 +1780,11 @@ // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined..3 -// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1685,103 +1794,105 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // COMP-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// COMP-NEXT: store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8 -// COMP-NEXT: store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load %struct.Point*, %struct.Point** [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load %struct.Point**, %struct.Point*** [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, i32* [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], i32* [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], i32* [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load %struct.Point*, %struct.Point** [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], %struct.Point* [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8* -// COMP-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 // COMP-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// COMP-NEXT: [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// COMP-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.4, [8 x i32]* @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: [[TMP27:%.*]] = bitcast %struct.Point* [[RED]] to i8* +// COMP-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// COMP-NEXT: [[TMP30:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// COMP-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], i32 1, i64 8, i8* [[TMP30]], void (i8*, i8*)* @.omp.reduction.reduction_func.4, [8 x i32]* @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointmLERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointmLERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 -// COMP-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointmLERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// COMP-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointmLERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -1811,13 +1922,11 @@ // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined..5 -// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1827,103 +1936,105 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // COMP-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// COMP-NEXT: store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8 -// COMP-NEXT: store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load %struct.Point*, %struct.Point** [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load %struct.Point**, %struct.Point*** [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, i32* [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], i32* [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], i32* [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load %struct.Point*, %struct.Point** [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], %struct.Point* [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8* -// COMP-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 // COMP-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// COMP-NEXT: [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// COMP-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: [[TMP27:%.*]] = bitcast %struct.Point* [[RED]] to i8* +// COMP-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// COMP-NEXT: [[TMP30:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// COMP-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], i32 1, i64 8, i8* [[TMP30]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaNERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaNERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 -// COMP-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaNERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// COMP-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaNERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -1953,13 +2064,11 @@ // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined..7 -// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1969,103 +2078,105 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // COMP-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// COMP-NEXT: store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8 -// COMP-NEXT: store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load %struct.Point*, %struct.Point** [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load %struct.Point**, %struct.Point*** [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, i32* [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], i32* [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], i32* [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load %struct.Point*, %struct.Point** [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], %struct.Point* [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8* -// COMP-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 // COMP-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// COMP-NEXT: [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// COMP-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.8, [8 x i32]* @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: [[TMP27:%.*]] = bitcast %struct.Point* [[RED]] to i8* +// COMP-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// COMP-NEXT: [[TMP30:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// COMP-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], i32 1, i64 8, i8* [[TMP30]], void (i8*, i8*)* @.omp.reduction.reduction_func.8, [8 x i32]* @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointoRERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointoRERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 -// COMP-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointoRERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// COMP-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointoRERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -2095,13 +2206,11 @@ // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined..9 -// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2111,103 +2220,105 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // COMP-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// COMP-NEXT: store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8 -// COMP-NEXT: store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load %struct.Point*, %struct.Point** [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load %struct.Point**, %struct.Point*** [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, i32* [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], i32* [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], i32* [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load %struct.Point*, %struct.Point** [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], %struct.Point* [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8* -// COMP-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 // COMP-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// COMP-NEXT: [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// COMP-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.10, [8 x i32]* @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: [[TMP27:%.*]] = bitcast %struct.Point* [[RED]] to i8* +// COMP-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// COMP-NEXT: [[TMP30:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// COMP-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], i32 1, i64 8, i8* [[TMP30]], void (i8*, i8*)* @.omp.reduction.reduction_func.10, [8 x i32]* @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointeOERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointeOERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 -// COMP-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointeOERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// COMP-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointeOERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -2237,13 +2348,11 @@ // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined..11 -// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2253,111 +2362,113 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // COMP-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// COMP-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// COMP-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // COMP-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// COMP-NEXT: store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8 -// COMP-NEXT: store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load %struct.Point*, %struct.Point** [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load %struct.Point**, %struct.Point*** [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, i32* [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], i32* [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], i32* [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load %struct.Point*, %struct.Point** [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], %struct.Point* [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8* -// COMP-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 // COMP-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// COMP-NEXT: [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// COMP-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.12, [8 x i32]* @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: [[TMP27:%.*]] = bitcast %struct.Point* [[RED]] to i8* +// COMP-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// COMP-NEXT: [[TMP30:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// COMP-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], i32 1, i64 8, i8* [[TMP30]], void (i8*, i8*)* @.omp.reduction.reduction_func.12, [8 x i32]* @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: [[TMP28:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64* -// COMP-NEXT: store i64 [[CALL]], i64* [[TMP28]], align 4 -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: [[TMP32:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64* +// COMP-NEXT: store i64 [[CALL]], i64* [[TMP32]], align 4 +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// COMP-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointaaERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: [[TMP31:%.*]] = bitcast %struct.Point* [[REF_TMP10]] to i64* -// COMP-NEXT: store i64 [[CALL11]], i64* [[TMP31]], align 4 -// COMP-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// COMP-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// COMP-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointaaERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: [[TMP35:%.*]] = bitcast %struct.Point* [[REF_TMP9]] to i64* +// COMP-NEXT: store i64 [[CALL10]], i64* [[TMP35]], align 4 +// COMP-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// COMP-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -2391,13 +2502,11 @@ // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined..13 -// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2407,111 +2516,113 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // COMP-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// COMP-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// COMP-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // COMP-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// COMP-NEXT: store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8 -// COMP-NEXT: store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load %struct.Point*, %struct.Point** [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load %struct.Point**, %struct.Point*** [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, i32* [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], i32* [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], i32* [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load %struct.Point*, %struct.Point** [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], %struct.Point* [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8* -// COMP-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 // COMP-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// COMP-NEXT: [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// COMP-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.14, [8 x i32]* @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: [[TMP27:%.*]] = bitcast %struct.Point* [[RED]] to i8* +// COMP-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// COMP-NEXT: [[TMP30:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// COMP-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], i32 1, i64 8, i8* [[TMP30]], void (i8*, i8*)* @.omp.reduction.reduction_func.14, [8 x i32]* @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: [[TMP28:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64* -// COMP-NEXT: store i64 [[CALL]], i64* [[TMP28]], align 4 -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: [[TMP32:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64* +// COMP-NEXT: store i64 [[CALL]], i64* [[TMP32]], align 4 +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// COMP-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointooERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: [[TMP31:%.*]] = bitcast %struct.Point* [[REF_TMP10]] to i64* -// COMP-NEXT: store i64 [[CALL11]], i64* [[TMP31]], align 4 -// COMP-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// COMP-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// COMP-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointooERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: [[TMP35:%.*]] = bitcast %struct.Point* [[REF_TMP9]] to i64* +// COMP-NEXT: store i64 [[CALL10]], i64* [[TMP35]], align 4 +// COMP-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[TMP4]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// COMP-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -2556,3 +2667,190 @@ // COMP-NEXT: store i32 0, i32* [[Y]], align 4 // COMP-NEXT: ret void // +// +// SIMD-ONLY-LABEL: define {{[^@]+}}@_Z3fooiPK5Point +// SIMD-ONLY-SAME: (i32 [[N:%.*]], %struct.Point* [[POINTS:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY-NEXT: entry: +// SIMD-ONLY-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY-NEXT: [[POINTS_ADDR:%.*]] = alloca %struct.Point*, align 8 +// SIMD-ONLY-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// SIMD-ONLY-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY-NEXT: [[I1:%.*]] = alloca i32, align 4 +// SIMD-ONLY-NEXT: [[I8:%.*]] = alloca i32, align 4 +// SIMD-ONLY-NEXT: [[I15:%.*]] = alloca i32, align 4 +// SIMD-ONLY-NEXT: [[I22:%.*]] = alloca i32, align 4 +// SIMD-ONLY-NEXT: [[I29:%.*]] = alloca i32, align 4 +// SIMD-ONLY-NEXT: [[I36:%.*]] = alloca i32, align 4 +// SIMD-ONLY-NEXT: [[I43:%.*]] = alloca i32, align 4 +// SIMD-ONLY-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY-NEXT: store %struct.Point* [[POINTS]], %struct.Point** [[POINTS_ADDR]], align 8 +// SIMD-ONLY-NEXT: call void @_ZN5PointC1Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR3:[0-9]+]] +// SIMD-ONLY-NEXT: store i32 0, i32* [[I]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY: for.cond: +// SIMD-ONLY-NEXT: [[TMP0:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP0]], [[TMP1]] +// SIMD-ONLY-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY: for.body: +// SIMD-ONLY-NEXT: [[TMP2:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY-NEXT: [[TMP3:%.*]] = load %struct.Point*, %struct.Point** [[POINTS_ADDR]], align 8 +// SIMD-ONLY-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP2]], %struct.Point* [[TMP3]]) +// SIMD-ONLY-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY: for.inc: +// SIMD-ONLY-NEXT: [[TMP4:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY-NEXT: [[INC:%.*]] = add i32 [[TMP4]], 1 +// SIMD-ONLY-NEXT: store i32 [[INC]], i32* [[I]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// SIMD-ONLY: for.end: +// SIMD-ONLY-NEXT: store i32 0, i32* [[I1]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND2:%.*]] +// SIMD-ONLY: for.cond2: +// SIMD-ONLY-NEXT: [[TMP5:%.*]] = load i32, i32* [[I1]], align 4 +// SIMD-ONLY-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY-NEXT: [[CMP3:%.*]] = icmp ult i32 [[TMP5]], [[TMP6]] +// SIMD-ONLY-NEXT: br i1 [[CMP3]], label [[FOR_BODY4:%.*]], label [[FOR_END7:%.*]] +// SIMD-ONLY: for.body4: +// SIMD-ONLY-NEXT: [[TMP7:%.*]] = load i32, i32* [[I1]], align 4 +// SIMD-ONLY-NEXT: [[TMP8:%.*]] = load %struct.Point*, %struct.Point** [[POINTS_ADDR]], align 8 +// SIMD-ONLY-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP7]], %struct.Point* [[TMP8]]) +// SIMD-ONLY-NEXT: br label [[FOR_INC5:%.*]] +// SIMD-ONLY: for.inc5: +// SIMD-ONLY-NEXT: [[TMP9:%.*]] = load i32, i32* [[I1]], align 4 +// SIMD-ONLY-NEXT: [[INC6:%.*]] = add i32 [[TMP9]], 1 +// SIMD-ONLY-NEXT: store i32 [[INC6]], i32* [[I1]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND2]], !llvm.loop [[LOOP4:![0-9]+]] +// SIMD-ONLY: for.end7: +// SIMD-ONLY-NEXT: store i32 0, i32* [[I8]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND9:%.*]] +// SIMD-ONLY: for.cond9: +// SIMD-ONLY-NEXT: [[TMP10:%.*]] = load i32, i32* [[I8]], align 4 +// SIMD-ONLY-NEXT: [[TMP11:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP10]], [[TMP11]] +// SIMD-ONLY-NEXT: br i1 [[CMP10]], label [[FOR_BODY11:%.*]], label [[FOR_END14:%.*]] +// SIMD-ONLY: for.body11: +// SIMD-ONLY-NEXT: [[TMP12:%.*]] = load i32, i32* [[I8]], align 4 +// SIMD-ONLY-NEXT: [[TMP13:%.*]] = load %struct.Point*, %struct.Point** [[POINTS_ADDR]], align 8 +// SIMD-ONLY-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP12]], %struct.Point* [[TMP13]]) +// SIMD-ONLY-NEXT: br label [[FOR_INC12:%.*]] +// SIMD-ONLY: for.inc12: +// SIMD-ONLY-NEXT: [[TMP14:%.*]] = load i32, i32* [[I8]], align 4 +// SIMD-ONLY-NEXT: [[INC13:%.*]] = add i32 [[TMP14]], 1 +// SIMD-ONLY-NEXT: store i32 [[INC13]], i32* [[I8]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND9]], !llvm.loop [[LOOP5:![0-9]+]] +// SIMD-ONLY: for.end14: +// SIMD-ONLY-NEXT: store i32 0, i32* [[I15]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND16:%.*]] +// SIMD-ONLY: for.cond16: +// SIMD-ONLY-NEXT: [[TMP15:%.*]] = load i32, i32* [[I15]], align 4 +// SIMD-ONLY-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP15]], [[TMP16]] +// SIMD-ONLY-NEXT: br i1 [[CMP17]], label [[FOR_BODY18:%.*]], label [[FOR_END21:%.*]] +// SIMD-ONLY: for.body18: +// SIMD-ONLY-NEXT: [[TMP17:%.*]] = load i32, i32* [[I15]], align 4 +// SIMD-ONLY-NEXT: [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[POINTS_ADDR]], align 8 +// SIMD-ONLY-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP17]], %struct.Point* [[TMP18]]) +// SIMD-ONLY-NEXT: br label [[FOR_INC19:%.*]] +// SIMD-ONLY: for.inc19: +// SIMD-ONLY-NEXT: [[TMP19:%.*]] = load i32, i32* [[I15]], align 4 +// SIMD-ONLY-NEXT: [[INC20:%.*]] = add i32 [[TMP19]], 1 +// SIMD-ONLY-NEXT: store i32 [[INC20]], i32* [[I15]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND16]], !llvm.loop [[LOOP6:![0-9]+]] +// SIMD-ONLY: for.end21: +// SIMD-ONLY-NEXT: store i32 0, i32* [[I22]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND23:%.*]] +// SIMD-ONLY: for.cond23: +// SIMD-ONLY-NEXT: [[TMP20:%.*]] = load i32, i32* [[I22]], align 4 +// SIMD-ONLY-NEXT: [[TMP21:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY-NEXT: [[CMP24:%.*]] = icmp ult i32 [[TMP20]], [[TMP21]] +// SIMD-ONLY-NEXT: br i1 [[CMP24]], label [[FOR_BODY25:%.*]], label [[FOR_END28:%.*]] +// SIMD-ONLY: for.body25: +// SIMD-ONLY-NEXT: [[TMP22:%.*]] = load i32, i32* [[I22]], align 4 +// SIMD-ONLY-NEXT: [[TMP23:%.*]] = load %struct.Point*, %struct.Point** [[POINTS_ADDR]], align 8 +// SIMD-ONLY-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP22]], %struct.Point* [[TMP23]]) +// SIMD-ONLY-NEXT: br label [[FOR_INC26:%.*]] +// SIMD-ONLY: for.inc26: +// SIMD-ONLY-NEXT: [[TMP24:%.*]] = load i32, i32* [[I22]], align 4 +// SIMD-ONLY-NEXT: [[INC27:%.*]] = add i32 [[TMP24]], 1 +// SIMD-ONLY-NEXT: store i32 [[INC27]], i32* [[I22]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND23]], !llvm.loop [[LOOP7:![0-9]+]] +// SIMD-ONLY: for.end28: +// SIMD-ONLY-NEXT: store i32 0, i32* [[I29]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND30:%.*]] +// SIMD-ONLY: for.cond30: +// SIMD-ONLY-NEXT: [[TMP25:%.*]] = load i32, i32* [[I29]], align 4 +// SIMD-ONLY-NEXT: [[TMP26:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] +// SIMD-ONLY-NEXT: br i1 [[CMP31]], label [[FOR_BODY32:%.*]], label [[FOR_END35:%.*]] +// SIMD-ONLY: for.body32: +// SIMD-ONLY-NEXT: [[TMP27:%.*]] = load i32, i32* [[I29]], align 4 +// SIMD-ONLY-NEXT: [[TMP28:%.*]] = load %struct.Point*, %struct.Point** [[POINTS_ADDR]], align 8 +// SIMD-ONLY-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP27]], %struct.Point* [[TMP28]]) +// SIMD-ONLY-NEXT: br label [[FOR_INC33:%.*]] +// SIMD-ONLY: for.inc33: +// SIMD-ONLY-NEXT: [[TMP29:%.*]] = load i32, i32* [[I29]], align 4 +// SIMD-ONLY-NEXT: [[INC34:%.*]] = add i32 [[TMP29]], 1 +// SIMD-ONLY-NEXT: store i32 [[INC34]], i32* [[I29]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]] +// SIMD-ONLY: for.end35: +// SIMD-ONLY-NEXT: store i32 0, i32* [[I36]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND37:%.*]] +// SIMD-ONLY: for.cond37: +// SIMD-ONLY-NEXT: [[TMP30:%.*]] = load i32, i32* [[I36]], align 4 +// SIMD-ONLY-NEXT: [[TMP31:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY-NEXT: [[CMP38:%.*]] = icmp ult i32 [[TMP30]], [[TMP31]] +// SIMD-ONLY-NEXT: br i1 [[CMP38]], label [[FOR_BODY39:%.*]], label [[FOR_END42:%.*]] +// SIMD-ONLY: for.body39: +// SIMD-ONLY-NEXT: [[TMP32:%.*]] = load i32, i32* [[I36]], align 4 +// SIMD-ONLY-NEXT: [[TMP33:%.*]] = load %struct.Point*, %struct.Point** [[POINTS_ADDR]], align 8 +// SIMD-ONLY-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP32]], %struct.Point* [[TMP33]]) +// SIMD-ONLY-NEXT: br label [[FOR_INC40:%.*]] +// SIMD-ONLY: for.inc40: +// SIMD-ONLY-NEXT: [[TMP34:%.*]] = load i32, i32* [[I36]], align 4 +// SIMD-ONLY-NEXT: [[INC41:%.*]] = add i32 [[TMP34]], 1 +// SIMD-ONLY-NEXT: store i32 [[INC41]], i32* [[I36]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND37]], !llvm.loop [[LOOP9:![0-9]+]] +// SIMD-ONLY: for.end42: +// SIMD-ONLY-NEXT: store i32 0, i32* [[I43]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND44:%.*]] +// SIMD-ONLY: for.cond44: +// SIMD-ONLY-NEXT: [[TMP35:%.*]] = load i32, i32* [[I43]], align 4 +// SIMD-ONLY-NEXT: [[TMP36:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY-NEXT: [[CMP45:%.*]] = icmp ult i32 [[TMP35]], [[TMP36]] +// SIMD-ONLY-NEXT: br i1 [[CMP45]], label [[FOR_BODY46:%.*]], label [[FOR_END49:%.*]] +// SIMD-ONLY: for.body46: +// SIMD-ONLY-NEXT: [[TMP37:%.*]] = load i32, i32* [[I43]], align 4 +// SIMD-ONLY-NEXT: [[TMP38:%.*]] = load %struct.Point*, %struct.Point** [[POINTS_ADDR]], align 8 +// SIMD-ONLY-NEXT: call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP37]], %struct.Point* [[TMP38]]) +// SIMD-ONLY-NEXT: br label [[FOR_INC47:%.*]] +// SIMD-ONLY: for.inc47: +// SIMD-ONLY-NEXT: [[TMP39:%.*]] = load i32, i32* [[I43]], align 4 +// SIMD-ONLY-NEXT: [[INC48:%.*]] = add i32 [[TMP39]], 1 +// SIMD-ONLY-NEXT: store i32 [[INC48]], i32* [[I43]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND44]], !llvm.loop [[LOOP10:![0-9]+]] +// SIMD-ONLY: for.end49: +// SIMD-ONLY-NEXT: ret void +// +// +// SIMD-ONLY-LABEL: define {{[^@]+}}@_ZN5PointC1Ev +// SIMD-ONLY-SAME: (%struct.Point* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// SIMD-ONLY-NEXT: entry: +// SIMD-ONLY-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.Point*, align 8 +// SIMD-ONLY-NEXT: store %struct.Point* [[THIS]], %struct.Point** [[THIS_ADDR]], align 8 +// SIMD-ONLY-NEXT: [[THIS1:%.*]] = load %struct.Point*, %struct.Point** [[THIS_ADDR]], align 8 +// SIMD-ONLY-NEXT: call void @_ZN5PointC2Ev(%struct.Point* nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR3]] +// SIMD-ONLY-NEXT: ret void +// +// +// SIMD-ONLY-LABEL: define {{[^@]+}}@_ZN5PointC2Ev +// SIMD-ONLY-SAME: (%struct.Point* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY-NEXT: entry: +// SIMD-ONLY-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.Point*, align 8 +// SIMD-ONLY-NEXT: store %struct.Point* [[THIS]], %struct.Point** [[THIS_ADDR]], align 8 +// SIMD-ONLY-NEXT: [[THIS1:%.*]] = load %struct.Point*, %struct.Point** [[THIS_ADDR]], align 8 +// SIMD-ONLY-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_POINT:%.*]], %struct.Point* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY-NEXT: store i32 0, i32* [[X]], align 4 +// SIMD-ONLY-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_POINT]], %struct.Point* [[THIS1]], i32 0, i32 1 +// SIMD-ONLY-NEXT: store i32 0, i32* [[Y]], align 4 +// SIMD-ONLY-NEXT: ret void +// diff --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp --- a/clang/test/OpenMP/reduction_implicit_map.cpp +++ b/clang/test/OpenMP/reduction_implicit_map.cpp @@ -101,19 +101,25 @@ // CHECK-SAME: (double* noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[E_ADDR:%.*]] = alloca double*, align 8 -// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK-NEXT: store double* [[E]], double** [[E_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: // CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK-NEXT: [[TMP2:%.*]] = load double*, double** [[E_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP2]] to i8* -// CHECK-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, double*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i64 1) +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = load double*, double** [[E_ADDR]], align 8 +// CHECK-NEXT: store double* [[TMP3]], double** [[TMP2]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-NEXT: [[TMP5:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP4]], i8* [[TMP5]]) +// CHECK-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to %struct.anon* +// CHECK-NEXT: store [[STRUCT_ANON]] [[TMP7]], %struct.anon* [[TMP8]], align 8 +// CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* null, i8* [[TMP6]]) +// CHECK-NEXT: call void @__kmpc_free_shared(i8* [[TMP5]], i64 8) // CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK-NEXT: ret void // CHECK: worker.exit: @@ -121,46 +127,48 @@ // // // CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[E_ADDR:%.*]] = alloca double*, align 8 -// CHECK-NEXT: [[E2:%.*]] = alloca double, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK-NEXT: [[E:%.*]] = alloca double, align 8 // CHECK-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store double* [[E]], double** [[E_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 8 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 0 -// CHECK-NEXT: [[TMP1:%.*]] = load double*, double** [[E_ADDR]], align 8 -// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 0 -// CHECK-NEXT: store double 0.000000e+00, double* [[E2]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load double*, double** [[E_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = ptrtoint double* [[TMP2]] to i64 -// CHECK-NEXT: [[TMP4:%.*]] = ptrtoint double* [[ARRAYIDX]] to i64 -// CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64) -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr double, double* [[E2]], i64 [[TMP6]] -// CHECK-NEXT: store double* [[TMP7]], double** [[TMP]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK-NEXT: store double 1.000000e+01, double* [[TMP8]], align 8 -// CHECK-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[E2]] to i8* -// CHECK-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 -// CHECK-NEXT: [[TMP13:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 1, i64 8, i8* [[TMP13]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func) -// CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 1 -// CHECK-NEXT: br i1 [[TMP15]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = load double*, double** [[TMP1]], align 8 +// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 0 +// CHECK-NEXT: store double 0.000000e+00, double* [[E]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP1]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = ptrtoint double* [[TMP4]] to i64 +// CHECK-NEXT: [[TMP6:%.*]] = ptrtoint double* [[ARRAYIDX]] to i64 +// CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP5]], [[TMP6]] +// CHECK-NEXT: [[TMP8:%.*]] = sdiv exact i64 [[TMP7]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64) +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr double, double* [[E]], i64 [[TMP8]] +// CHECK-NEXT: store double* [[TMP9]], double** [[TMP]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK-NEXT: store double 1.000000e+01, double* [[TMP10]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[E]] to i8* +// CHECK-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 +// CHECK-NEXT: [[TMP15:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 1, i64 8, i8* [[TMP15]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func) +// CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 1 +// CHECK-NEXT: br i1 [[TMP17]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK: .omp.reduction.then: -// CHECK-NEXT: [[TMP16:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK-NEXT: [[TMP17:%.*]] = load double, double* [[E2]], align 8 -// CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP16]], [[TMP17]] +// CHECK-NEXT: [[TMP18:%.*]] = load double, double* [[ARRAYIDX]], align 8 +// CHECK-NEXT: [[TMP19:%.*]] = load double, double* [[E]], align 8 +// CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP18]], [[TMP19]] // CHECK-NEXT: store double [[ADD]], double* [[ARRAYIDX]], align 8 -// CHECK-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP10]]) +// CHECK-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP12]]) // CHECK-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK: .omp.reduction.done: // CHECK-NEXT: ret void @@ -376,75 +384,80 @@ // CHECK1-SAME: ([5 x %class.S2]* noundef nonnull align 4 dereferenceable(20) [[O:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[O_ADDR:%.*]] = alloca [5 x %class.S2]*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store [5 x %class.S2]* [[O]], [5 x %class.S2]** [[O_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load [5 x %class.S2]*, [5 x %class.S2]** [[O_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [5 x %class.S2]*)* @.omp_outlined. to void (i32*, i32*, ...)*), [5 x %class.S2]* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [5 x %class.S2]* [[TMP0]], [5 x %class.S2]** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [5 x %class.S2]* noundef nonnull align 4 dereferenceable(20) [[O:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[O_ADDR:%.*]] = alloca [5 x %class.S2]*, align 8 -// CHECK1-NEXT: [[O1:%.*]] = alloca [[CLASS_S2:%.*]], align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[O:%.*]] = alloca [[CLASS_S2:%.*]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [5 x %class.S2]* [[O]], [5 x %class.S2]** [[O_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [5 x %class.S2]*, [5 x %class.S2]** [[O_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %class.S2], [5 x %class.S2]* [[TMP0]], i64 0, i64 0 -// CHECK1-NEXT: call void @_ZN2S2C1Ev(%class.S2* noundef nonnull align 4 dereferenceable(4) [[O1]]) -// CHECK1-NEXT: [[TMP1:%.*]] = bitcast [5 x %class.S2]* [[TMP0]] to %class.S2* -// CHECK1-NEXT: [[TMP2:%.*]] = ptrtoint %class.S2* [[TMP1]] to i64 -// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint %class.S2* [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] -// CHECK1-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (%class.S2* getelementptr ([[CLASS_S2]], %class.S2* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[CLASS_S2]], %class.S2* [[O1]], i64 [[TMP5]] -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast %class.S2* [[TMP6]] to [5 x %class.S2]* +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [5 x %class.S2]*, [5 x %class.S2]** [[TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %class.S2], [5 x %class.S2]* [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN2S2C1Ev(%class.S2* noundef nonnull align 4 dereferenceable(4) [[O]]) +// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [5 x %class.S2]* [[TMP2]] to %class.S2* +// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint %class.S2* [[TMP3]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint %class.S2* [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (%class.S2* getelementptr ([[CLASS_S2]], %class.S2* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[CLASS_S2]], %class.S2* [[O]], i64 [[TMP7]] +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %class.S2* [[TMP8]] to [5 x %class.S2]* // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 // CHECK1-NEXT: br label [[FOR_COND:%.*]] // CHECK1: for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP8]], 10 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP10]], 10 // CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] // CHECK1: for.body: // CHECK1-NEXT: br label [[FOR_INC:%.*]] // CHECK1: for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: for.end: -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %class.S2* [[O1]] to i8* -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 1, i64 8, i8* [[TMP14]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %class.S2* [[O]] to i8* +// CHECK1-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP15]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %class.S2* @_ZN2S2plERS_(%class.S2* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]], %class.S2* noundef nonnull align 4 dereferenceable(4) [[O1]]) -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast %class.S2* [[ARRAYIDX]] to i8* -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast %class.S2* [[CALL]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP16]], i8* align 4 [[TMP17]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %class.S2* @_ZN2S2plERS_(%class.S2* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]], %class.S2* noundef nonnull align 4 dereferenceable(4) [[O]]) +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast %class.S2* [[ARRAYIDX]] to i8* +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %class.S2* [[CALL]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP19]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) %class.S2* @_ZN2S2plERS_(%class.S2* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]], %class.S2* noundef nonnull align 4 dereferenceable(4) [[O1]]) -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast %class.S2* [[ARRAYIDX]] to i8* -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %class.S2* [[CALL2]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP19]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP21]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL1:%.*]] = call noundef nonnull align 4 dereferenceable(4) %class.S2* @_ZN2S2plERS_(%class.S2* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]], %class.S2* noundef nonnull align 4 dereferenceable(4) [[O]]) +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast %class.S2* [[ARRAYIDX]] to i8* +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %class.S2* [[CALL1]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP21]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -478,18 +491,21 @@ // CHECK1-SAME: ([10 x [10 x [10 x double]]]* noundef nonnull align 8 dereferenceable(8000) [[B:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x [10 x double]]]*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store [10 x [10 x [10 x double]]]* [[B]], [10 x [10 x [10 x double]]]** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x double]]]*, [10 x [10 x [10 x double]]]** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x [10 x [10 x double]]]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x [10 x [10 x double]]]* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [10 x [10 x [10 x double]]]* [[TMP0]], [10 x [10 x [10 x double]]]** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x [10 x double]]]* noundef nonnull align 8 dereferenceable(8000) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x [10 x double]]]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -504,183 +520,185 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [10 x [10 x [10 x double]]]* [[B]], [10 x [10 x [10 x double]]]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x double]]]*, [10 x [10 x [10 x double]]]** [[B_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x double]]]*, [10 x [10 x [10 x double]]]** [[TMP1]], align 8 // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 9, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], [10 x [10 x [10 x double]]]* [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], [10 x [10 x [10 x double]]]* [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x [10 x double]], [10 x [10 x double]]* [[ARRAYIDX]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYDECAY]], i64 2 // CHECK1-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX1]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[ARRAYDECAY2]], i64 1 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], [10 x [10 x [10 x double]]]* [[TMP0]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], [10 x [10 x [10 x double]]]* [[TMP2]], i64 0, i64 1 // CHECK1-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [10 x [10 x double]], [10 x [10 x double]]* [[ARRAYIDX4]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYDECAY5]], i64 5 // CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX6]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[ARRAYDECAY7]], i64 1 -// CHECK1-NEXT: [[TMP1:%.*]] = ptrtoint double* [[ARRAYIDX8]] to i64 -// CHECK1-NEXT: [[TMP2:%.*]] = ptrtoint double* [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], [[TMP2]] -// CHECK1-NEXT: [[TMP4:%.*]] = sdiv exact i64 [[TMP3]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP5:%.*]] = add nuw i64 [[TMP4]], 1 -// CHECK1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP7:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP7]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP5]], align 8 -// CHECK1-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr double, double* [[VLA]], i64 [[TMP5]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq double* [[VLA]], [[TMP8]] +// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint double* [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint double* [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]] +// CHECK1-NEXT: [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP7:%.*]] = add nuw i64 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP9:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP7]], align 8 +// CHECK1-NEXT: store i64 [[TMP7]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr double, double* [[VLA]], i64 [[TMP7]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq double* [[VLA]], [[TMP10]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi double* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store double 0.000000e+00, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP10]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [10 x [10 x [10 x double]]]* [[TMP0]] to double* -// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint double* [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint double* [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr double, double* [[VLA]], i64 [[TMP13]] -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP14]] to [10 x [10 x [10 x double]]]* +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [10 x [10 x [10 x double]]]* [[TMP2]] to double* +// CHECK1-NEXT: [[TMP12:%.*]] = ptrtoint double* [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP13:%.*]] = ptrtoint double* [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP14:%.*]] = sub i64 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = sdiv exact i64 [[TMP14]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr double, double* [[VLA]], i64 [[TMP15]] +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast double* [[TMP16]] to [10 x [10 x [10 x double]]]* // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [1 x %struct.kmp_taskred_input_t], [1 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], [10 x [10 x [10 x double]]]* [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], [10 x [10 x [10 x double]]]* [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [10 x [10 x double]], [10 x [10 x double]]* [[ARRAYIDX9]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYDECAY10]], i64 2 // CHECK1-NEXT: [[ARRAYDECAY12:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX11]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[ARRAYDECAY12]], i64 1 -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], [10 x [10 x [10 x double]]]* [[TMP0]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], [10 x [10 x [10 x double]]]* [[TMP2]], i64 0, i64 1 // CHECK1-NEXT: [[ARRAYDECAY15:%.*]] = getelementptr inbounds [10 x [10 x double]], [10 x [10 x double]]* [[ARRAYIDX14]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYDECAY15]], i64 5 // CHECK1-NEXT: [[ARRAYDECAY17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYDECAY17]], i64 1 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast double* [[VLA]] to i8* -// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast double* [[ARRAYIDX13]] to i8* +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA]] to i8* // CHECK1-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint double* [[ARRAYIDX18]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint double* [[ARRAYIDX13]] to i64 -// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP24:%.*]] = add nuw i64 [[TMP23]], 1 -// CHECK1-NEXT: [[TMP25:%.*]] = mul nuw i64 [[TMP24]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP25]], i64* [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, i32* [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast [1 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* -// CHECK1-NEXT: [[TMP34:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB2]], i32 [[TMP32]], i32 1, i32 1, i8* [[TMP33]]) -// CHECK1-NEXT: store i8* [[TMP34]], i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP36]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP37]], 9 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast double* [[ARRAYIDX13]] to i8* +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = ptrtoint double* [[ARRAYIDX18]] to i64 +// CHECK1-NEXT: [[TMP23:%.*]] = ptrtoint double* [[ARRAYIDX13]] to i64 +// CHECK1-NEXT: [[TMP24:%.*]] = sub i64 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP25:%.*]] = sdiv exact i64 [[TMP24]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP26:%.*]] = add nuw i64 [[TMP25]], 1 +// CHECK1-NEXT: [[TMP27:%.*]] = mul nuw i64 [[TMP26]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP27]], i64* [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, i32* [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast [1 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* +// CHECK1-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB2]], i32 [[TMP34]], i32 1, i32 1, i8* [[TMP35]]) +// CHECK1-NEXT: store i8* [[TMP36]], i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP38]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP39:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP39]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP38:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP38]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP40]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP39]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP41]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP19:%.*]] = icmp sle i64 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP19:%.*]] = icmp sle i64 [[TMP42]], [[TMP43]] // CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP42]], 1 +// CHECK1-NEXT: [[TMP44:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP44]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[I]], align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP43:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP43]], 1 +// CHECK1-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP45]], 1 // CHECK1-NEXT: store i64 [[ADD20]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP45]]) // CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]], i32 1) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP49:%.*]] = bitcast double* [[VLA]] to i8* -// CHECK1-NEXT: store i8* [[TMP49]], i8** [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP51:%.*]] = inttoptr i64 [[TMP5]] to i8* +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP47]]) +// CHECK1-NEXT: [[TMP48:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP48]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP49]], i32 1) +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP51:%.*]] = bitcast double* [[VLA]] to i8* // CHECK1-NEXT: store i8* [[TMP51]], i8** [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[TMP52]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP55:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP53]], i32 1, i64 16, i8* [[TMP54]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP55]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP53:%.*]] = inttoptr i64 [[TMP7]] to i8* +// CHECK1-NEXT: store i8* [[TMP53]], i8** [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[TMP54]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP57:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP55]], i32 1, i64 16, i8* [[TMP56]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP57]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr double, double* [[ARRAYIDX3]], i64 [[TMP5]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq double* [[ARRAYIDX3]], [[TMP56]] +// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr double, double* [[ARRAYIDX3]], i64 [[TMP7]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq double* [[ARRAYIDX3]], [[TMP58]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE25:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi double* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi double* [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP57:%.*]] = load double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = load double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8 -// CHECK1-NEXT: [[ADD22:%.*]] = fadd double [[TMP57]], [[TMP58]] +// CHECK1-NEXT: [[TMP59:%.*]] = load double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8 +// CHECK1-NEXT: [[ADD22:%.*]] = fadd double [[TMP59]], [[TMP60]] // CHECK1-NEXT: store double [[ADD22]], double* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], align 8 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP56]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP58]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done25: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP53]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP55]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr double, double* [[ARRAYIDX3]], i64 [[TMP5]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY26:%.*]] = icmp eq double* [[ARRAYIDX3]], [[TMP59]] +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr double, double* [[ARRAYIDX3]], i64 [[TMP7]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY26:%.*]] = icmp eq double* [[ARRAYIDX3]], [[TMP61]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY26]], label [[OMP_ARRAYCPY_DONE33:%.*]], label [[OMP_ARRAYCPY_BODY27:%.*]] // CHECK1: omp.arraycpy.body27: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST28:%.*]] = phi double* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT31:%.*]], [[OMP_ARRAYCPY_BODY27]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST29:%.*]] = phi double* [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT30:%.*]], [[OMP_ARRAYCPY_BODY27]] ] -// CHECK1-NEXT: [[TMP60:%.*]] = load double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST28]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = atomicrmw fadd double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]], double [[TMP60]] monotonic, align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST28]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = atomicrmw fadd double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]], double [[TMP62]] monotonic, align 8 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT30]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT31]] = getelementptr double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST28]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE32:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT30]], [[TMP59]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE32:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT30]], [[TMP61]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_BODY27]] // CHECK1: omp.arraycpy.done33: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP62:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP62]]) +// CHECK1-NEXT: [[TMP64:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP64]]) // CHECK1-NEXT: ret void // // @@ -1031,157 +1049,177 @@ // CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK2-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4 // CHECK2-NEXT: store i32* [[OUTPUT]], i32** [[OUTPUT_ADDR]], align 4 // CHECK2-NEXT: store i32* [[INPUT]], i32** [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP0]], i32* [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP1]], i32* [[TMP2]], i32* [[TMP3]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4 +// CHECK2-NEXT: store i32* [[TMP3]], i32** [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[TMP4]], align 4 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], i32* noundef [[OUTPUT:%.*]], i32* noundef [[INPUT:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[OUTPUT1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK2-NEXT: [[SIZE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32* [[OUTPUT]], i32** [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: store i32* [[INPUT]], i32** [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 0 -// CHECK2-NEXT: store i32 0, i32* [[OUTPUT1]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = ptrtoint i32* [[TMP1]] to i64 -// CHECK2-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 -// CHECK2-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] -// CHECK2-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr i32, i32* [[OUTPUT1]], i64 [[TMP5]] -// CHECK2-NEXT: store i32* [[TMP6]], i32** [[TMP]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 +// CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[SIZE]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 +// CHECK2-NEXT: store i32 0, i32* [[OUTPUT]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = ptrtoint i32* [[TMP6]] to i64 +// CHECK2-NEXT: [[TMP8:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 +// CHECK2-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[OUTPUT]], i64 [[TMP10]] +// CHECK2-NEXT: store i32* [[TMP11]], i32** [[TMP]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIZE]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP17]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP22]], i32* [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP20]], i32 [[TMP21]], i32 [[TMP23]], i32* [[TMP24]], i32* [[TMP25]]) +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP28]], i32* [[TMP27]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[SIZE]], align 4 +// CHECK2-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32*, i32** [[TMP]], align 4 +// CHECK2-NEXT: store i32* [[TMP34]], i32** [[TMP33]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32*, i32** [[TMP4]], align 4 +// CHECK2-NEXT: store i32* [[TMP36]], i32** [[TMP35]], align 4 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK2-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: -// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP31:%.*]] = bitcast i32* [[OUTPUT1]] to i8* -// CHECK2-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK2-NEXT: [[TMP35:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP33]], i32 1, i32 4, i8* [[TMP34]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK2-NEXT: switch i32 [[TMP35]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK2-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP42:%.*]] = bitcast i32* [[OUTPUT]] to i8* +// CHECK2-NEXT: store i8* [[TMP42]], i8** [[TMP41]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK2-NEXT: [[TMP46:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP44]], i32 1, i32 4, i8* [[TMP45]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: switch i32 [[TMP46]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[OUTPUT1]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[OUTPUT]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP44]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[OUTPUT1]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = atomicrmw add i32* [[ARRAYIDX]], i32 [[TMP38]] monotonic, align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[OUTPUT]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = atomicrmw add i32* [[ARRAYIDX]], i32 [[TMP49]] monotonic, align 4 // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[SIZE:%.*]], i32* noundef [[OUTPUT:%.*]], i32* noundef [[INPUT:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[SIZE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1191,121 +1229,129 @@ // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[OUTPUT3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP4:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP3:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32* [[OUTPUT]], i32** [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: store i32* [[INPUT]], i32** [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK2-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[SIZE]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[SIZE]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 0 -// CHECK2-NEXT: store i32 0, i32* [[OUTPUT3]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = ptrtoint i32* [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] -// CHECK2-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[OUTPUT3]], i64 [[TMP11]] -// CHECK2-NEXT: store i32* [[TMP12]], i32** [[_TMP4]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 0 +// CHECK2-NEXT: store i32 0, i32* [[OUTPUT]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = ptrtoint i32* [[TMP16]] to i64 +// CHECK2-NEXT: [[TMP18:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 +// CHECK2-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[OUTPUT]], i64 [[TMP20]] +// CHECK2-NEXT: store i32* [[TMP21]], i32** [[_TMP3]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP23]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]] -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[_TMP4]], align 4 -// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 0 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP27]], [[TMP25]] -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[ARRAYIDX9]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32*, i32** [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[TMP32]], i32 [[TMP33]] +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32*, i32** [[_TMP3]], align 4 +// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP35]], i32 0 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP34]] +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX8]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP37]], 1 +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP32:%.*]] = bitcast i32* [[OUTPUT3]] to i8* -// CHECK2-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK2-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], i32 1, i32 4, i8* [[TMP35]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK2-NEXT: switch i32 [[TMP36]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK2-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP41:%.*]] = bitcast i32* [[OUTPUT]] to i8* +// CHECK2-NEXT: store i8* [[TMP41]], i8** [[TMP40]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK2-NEXT: [[TMP45:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP43]], i32 1, i32 4, i8* [[TMP44]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: switch i32 [[TMP45]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[OUTPUT3]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[OUTPUT]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] +// CHECK2-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP43]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[OUTPUT3]], align 4 -// CHECK2-NEXT: [[TMP40:%.*]] = atomicrmw add i32* [[ARRAYIDX]], i32 [[TMP39]] monotonic, align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[OUTPUT]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = atomicrmw add i32* [[ARRAYIDX]], i32 [[TMP48]] monotonic, align 4 // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -1367,193 +1413,213 @@ // CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK2-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4 // CHECK2-NEXT: store i32* [[OUTPUT]], i32** [[OUTPUT_ADDR]], align 4 // CHECK2-NEXT: store i32* [[INPUT]], i32** [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP0]], i32* [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32* [[TMP2]], i32* [[TMP3]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4 +// CHECK2-NEXT: store i32* [[TMP3]], i32** [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[TMP4]], align 4 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], i32* noundef [[OUTPUT:%.*]], i32* noundef [[INPUT:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[OUTPUT2:%.*]] = alloca [3 x i32], align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK2-NEXT: [[SIZE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OUTPUT:%.*]] = alloca [3 x i32], align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I6:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32* [[OUTPUT]], i32** [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: store i32* [[INPUT]], i32** [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 0 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 2 -// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[OUTPUT2]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i32 3 -// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP2]] +// CHECK2-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[SIZE]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 2 +// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[OUTPUT]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i32 3 +// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP7]] // CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK2: omp.arrayinit.body: // CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK2-NEXT: store i32 0, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] // CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK2: omp.arrayinit.done: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[TMP3]] to i64 -// CHECK2-NEXT: [[TMP5:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 -// CHECK2-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] -// CHECK2-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast [3 x i32]* [[OUTPUT2]] to i32* -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP8]], i64 [[TMP7]] -// CHECK2-NEXT: store i32* [[TMP9]], i32** [[TMP]], align 4 -// CHECK2-NEXT: [[RHS_BEGIN:%.*]] = bitcast [3 x i32]* [[OUTPUT2]] to i32* -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = ptrtoint i32* [[TMP8]] to i64 +// CHECK2-NEXT: [[TMP10:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 +// CHECK2-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) +// CHECK2-NEXT: [[TMP13:%.*]] = bitcast [3 x i32]* [[OUTPUT]] to i32* +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[TMP13]], i64 [[TMP12]] +// CHECK2-NEXT: store i32* [[TMP14]], i32** [[TMP]], align 4 +// CHECK2-NEXT: [[RHS_BEGIN:%.*]] = bitcast [3 x i32]* [[OUTPUT]] to i32* +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[SIZE]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB5]], i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] -// CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP25]], i32* [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP23]], i32 [[TMP24]], i32 [[TMP26]], i32* [[TMP27]], i32* [[TMP28]]) +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP28]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP29]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP31]], i32* [[TMP30]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP33]], i32* [[TMP32]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[SIZE]], align 4 +// CHECK2-NEXT: store i32 [[TMP35]], i32* [[TMP34]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32*, i32** [[TMP]], align 4 +// CHECK2-NEXT: store i32* [[TMP37]], i32** [[TMP36]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32*, i32** [[TMP4]], align 4 +// CHECK2-NEXT: store i32* [[TMP39]], i32** [[TMP38]], align 4 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK2-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: -// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP34:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* -// CHECK2-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK2-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], i32 1, i32 4, i8* [[TMP37]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK2-NEXT: switch i32 [[TMP38]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK2-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP45:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* +// CHECK2-NEXT: store i8* [[TMP45]], i8** [[TMP44]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK2-NEXT: [[TMP49:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP47]], i32 1, i32 4, i8* [[TMP48]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: switch i32 [[TMP49]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: -// CHECK2-NEXT: [[TMP39:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 3 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP39]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 3 +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP50]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK2: omp.arraycpy.body: // CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], align 4 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP51]], [[TMP52]] +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], align 4 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT10]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 // CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE12:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP39]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK2: omp.arraycpy.done13: -// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT10]], [[TMP50]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] +// CHECK2: omp.arraycpy.done12: +// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP47]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: -// CHECK2-NEXT: [[TMP42:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 3 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY14:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP42]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY14]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY15:%.*]] -// CHECK2: omp.arraycpy.body15: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST16:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY15]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST17:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY15]] ] -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST16]], align 4 -// CHECK2-NEXT: [[TMP44:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 [[TMP43]] monotonic, align 4 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT19]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST16]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP42]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY15]] -// CHECK2: omp.arraycpy.done21: +// CHECK2-NEXT: [[TMP53:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 3 +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY13:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP53]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY13]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY14:%.*]] +// CHECK2: omp.arraycpy.body14: +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST15:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY14]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY14]] ] +// CHECK2-NEXT: [[TMP54:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST15]], align 4 +// CHECK2-NEXT: [[TMP55:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 [[TMP54]] monotonic, align 4 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT18]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST15]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP53]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY14]] +// CHECK2: omp.arraycpy.done20: // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[SIZE:%.*]], i32* noundef [[OUTPUT:%.*]], i32* noundef [[INPUT:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[SIZE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1563,157 +1629,165 @@ // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[OUTPUT4:%.*]] = alloca [3 x i32], align 4 -// CHECK2-NEXT: [[_TMP5:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OUTPUT:%.*]] = alloca [3 x i32], align 4 +// CHECK2-NEXT: [[_TMP4:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32* [[OUTPUT]], i32** [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: store i32* [[INPUT]], i32** [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK2-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[SIZE]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[SIZE]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i32 2 -// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[OUTPUT4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i32 3 -// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP8]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 0 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i32 2 +// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[OUTPUT]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i32 3 +// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP17]] // CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK2: omp.arrayinit.body: // CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[ARRAY_BEGIN]], [[OMP_PRECOND_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK2-NEXT: store i32 0, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK2: omp.arrayinit.done: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = ptrtoint i32* [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 -// CHECK2-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] -// CHECK2-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) -// CHECK2-NEXT: [[TMP14:%.*]] = bitcast [3 x i32]* [[OUTPUT4]] to i32* -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr i32, i32* [[TMP14]], i64 [[TMP13]] -// CHECK2-NEXT: store i32* [[TMP15]], i32** [[_TMP5]], align 4 -// CHECK2-NEXT: [[RHS_BEGIN:%.*]] = bitcast [3 x i32]* [[OUTPUT4]] to i32* -// CHECK2-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = ptrtoint i32* [[TMP18]] to i64 +// CHECK2-NEXT: [[TMP20:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 +// CHECK2-NEXT: [[TMP21:%.*]] = sub i64 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: [[TMP22:%.*]] = sdiv exact i64 [[TMP21]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) +// CHECK2-NEXT: [[TMP23:%.*]] = bitcast [3 x i32]* [[OUTPUT]] to i32* +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr i32, i32* [[TMP23]], i64 [[TMP22]] +// CHECK2-NEXT: store i32* [[TMP24]], i32** [[_TMP4]], align 4 +// CHECK2-NEXT: [[RHS_BEGIN:%.*]] = bitcast [3 x i32]* [[OUTPUT]] to i32* +// CHECK2-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE]] ], [ [[TMP30]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] -// CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP32]], [[TMP33]] +// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP34]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]] -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32*, i32** [[_TMP5]], align 4 -// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i32 0 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[ARRAYIDX10]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP30]], [[TMP28]] -// CHECK2-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX10]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP35]], i32 [[TMP36]] +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32*, i32** [[_TMP4]], align 4 +// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[TMP38]], i32 0 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP39]], [[TMP37]] +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[ARRAYIDX9]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) -// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP35:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* -// CHECK2-NEXT: store i8* [[TMP35]], i8** [[TMP34]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK2-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP37]], i32 1, i32 4, i8* [[TMP38]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK2-NEXT: switch i32 [[TMP39]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK2-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP42]]) +// CHECK2-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP44:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* +// CHECK2-NEXT: store i8* [[TMP44]], i8** [[TMP43]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK2-NEXT: [[TMP48:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP46]], i32 1, i32 4, i8* [[TMP47]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: switch i32 [[TMP48]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: -// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 3 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP40]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK2-NEXT: [[TMP49:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 3 +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP49]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE16:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK2: omp.arraycpy.body: // CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] -// CHECK2-NEXT: store i32 [[ADD14]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT14:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP50]], [[TMP51]] +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 4 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT14]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 // CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP40]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] -// CHECK2: omp.arraycpy.done17: -// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP37]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE15:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT14]], [[TMP49]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_BODY]] +// CHECK2: omp.arraycpy.done16: +// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP46]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: -// CHECK2-NEXT: [[TMP43:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 3 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP43]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE25:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] -// CHECK2: omp.arraycpy.body19: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY19]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY19]] ] -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 4 -// CHECK2-NEXT: [[TMP45:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 [[TMP44]] monotonic, align 4 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT22]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT23]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT22]], [[TMP43]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_BODY19]] -// CHECK2: omp.arraycpy.done25: +// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 3 +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY17:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP52]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY17]], label [[OMP_ARRAYCPY_DONE24:%.*]], label [[OMP_ARRAYCPY_BODY18:%.*]] +// CHECK2: omp.arraycpy.body18: +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST19:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY18]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST20:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY18]] ] +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST19]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i32 [[TMP53]] monotonic, align 4 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT21]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT22]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE23:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT21]], [[TMP52]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE23]], label [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_BODY18]] +// CHECK2: omp.arraycpy.done24: // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -1796,112 +1870,118 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK2-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4 // CHECK2-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP2]], [10 x i32]* [[TMP0]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP3]], align 4 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK2-NEXT: [[A2:%.*]] = alloca [2 x i32], align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK2-NEXT: [[SIZE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca [2 x i32], align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 1 -// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A2]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i32 2 -// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP1]] +// CHECK2-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[SIZE]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP3]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i32 2 +// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP5]] // CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK2: omp.arrayinit.body: // CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK2-NEXT: store i32 0, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK2: omp.arrayinit.done: -// CHECK2-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i32* -// CHECK2-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP2]] to i64 -// CHECK2-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 -// CHECK2-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]] -// CHECK2-NEXT: [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) -// CHECK2-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[A2]] to i32* -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP7]], i64 [[TMP6]] -// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to [10 x i32]* -// CHECK2-NEXT: [[RHS_BEGIN:%.*]] = bitcast [2 x i32]* [[A2]] to i32* +// CHECK2-NEXT: [[TMP6:%.*]] = bitcast [10 x i32]* [[TMP4]] to i32* +// CHECK2-NEXT: [[TMP7:%.*]] = ptrtoint i32* [[TMP6]] to i64 +// CHECK2-NEXT: [[TMP8:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 +// CHECK2-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) +// CHECK2-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[A]] to i32* +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP11]], i64 [[TMP10]] +// CHECK2-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to [10 x i32]* +// CHECK2-NEXT: [[RHS_BEGIN:%.*]] = bitcast [2 x i32]* [[A]] to i32* // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 // CHECK2-NEXT: br label [[FOR_COND:%.*]] // CHECK2: for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[SIZE]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] // CHECK2: for.body: // CHECK2-NEXT: br label [[FOR_INC:%.*]] // CHECK2: for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK2-NEXT: store i32 [[INC]], i32* [[I]], align 4 // CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK2: for.end: -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* -// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK2-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], i32 1, i32 4, i8* [[TMP17]], void (i8*, i8*)* @.omp.reduction.reduction_func.10, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK2-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP18:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* +// CHECK2-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK2-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], i32 1, i32 4, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func.10, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 2 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP19]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 2 +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP23]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK2: omp.arraycpy.body: // CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST2:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT3:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 4 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT3]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST2]], i32 1 // CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP19]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK2: omp.arraycpy.done6: -// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE4:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT3]], [[TMP23]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK2: omp.arraycpy.done5: +// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 2 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP22]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK2: omp.arraycpy.body8: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP23]] monotonic, align 4 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP22]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK2: omp.arraycpy.done14: +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 2 +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY6:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP26]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY6]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY7:%.*]] +// CHECK2: omp.arraycpy.body7: +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST8:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY7]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY7]] ] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST8]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 [[TMP27]] monotonic, align 4 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT10]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT11]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST8]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE12:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT10]], [[TMP26]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY7]] +// CHECK2: omp.arraycpy.done13: // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: // CHECK2-NEXT: ret void @@ -1947,77 +2027,83 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK2-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4 // CHECK2-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], [10 x i32]* [[TMP0]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP3]], align 4 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK2-NEXT: [[SIZE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 3 -// CHECK2-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[TMP0]] to i32* -// CHECK2-NEXT: [[TMP2:%.*]] = ptrtoint i32* [[TMP1]] to i64 -// CHECK2-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 -// CHECK2-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] -// CHECK2-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr i32, i32* [[A1]], i64 [[TMP5]] -// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to [10 x i32]* +// CHECK2-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[SIZE]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP3]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 3 +// CHECK2-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = bitcast [10 x i32]* [[TMP4]] to i32* +// CHECK2-NEXT: [[TMP6:%.*]] = ptrtoint i32* [[TMP5]] to i64 +// CHECK2-NEXT: [[TMP7:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64 +// CHECK2-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP9:%.*]] = sdiv exact i64 [[TMP8]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP9]] +// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to [10 x i32]* // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 // CHECK2-NEXT: br label [[FOR_COND:%.*]] // CHECK2: for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[SIZE]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] // CHECK2: for.body: // CHECK2-NEXT: br label [[FOR_INC:%.*]] // CHECK2: for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK2-NEXT: store i32 [[INC]], i32* [[I]], align 4 // CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: for.end: -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32 1, i32 4, i8* [[TMP15]], void (i8*, i8*)* @.omp.reduction.reduction_func.14, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK2-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP16:%.*]] = bitcast i32* [[A]] to i8* +// CHECK2-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK2-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP18]], i32 1, i32 4, i8* [[TMP19]], void (i8*, i8*)* @.omp.reduction.reduction_func.14, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP18]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = atomicrmw add i32* [[ARRAYIDX]], i32 [[TMP19]] monotonic, align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = atomicrmw add i32* [[ARRAYIDX]], i32 [[TMP23]] monotonic, align 4 // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/sections_firstprivate_codegen.cpp b/clang/test/OpenMP/sections_firstprivate_codegen.cpp --- a/clang/test/OpenMP/sections_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/sections_firstprivate_codegen.cpp @@ -400,6 +400,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* @@ -409,22 +410,30 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[T_VAR]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[VAR]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP6]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2StC2Ev @@ -493,120 +502,119 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR6:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP10]], i8* align 4 [[TMP11]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x %struct.S.0]* [[TMP6]] to %struct.S.0* +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP13]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP12]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP11]], 1 -// CHECK1-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 1 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp slt i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 1 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP22]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE7:%.*]] +// CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE3:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK1: .omp.sections.case7: -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[VAR5]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 4, i1 false) +// CHECK1: .omp.sections.case3: +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i64 4, i1 false) // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i64 2 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN5]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP24]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done10: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP26]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE6:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done6: +// CHECK1-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP31]]) // CHECK1-NEXT: ret void // // @@ -824,73 +832,75 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK3-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[G]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1 -// CHECK3-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 1 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load volatile i32, i32* @g, align 4 +// CHECK3-NEXT: store i32 [[TMP3]], i32* [[G]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = icmp slt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP7]], i32 1 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: switch i32 [[TMP11]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] +// CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: // CHECK3-NEXT: store i32 1, i32* [[G]], align 4 -// CHECK3-NEXT: store i32 10, i32* [[SIVAR1]], align 4 +// CHECK3-NEXT: store i32 10, i32* [[SIVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK3: .omp.sections.case2: -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store i32* [[G]], i32** [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[SIVAR1]], i32** [[TMP13]], align 8 +// CHECK3: .omp.sections.case1: +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[G]], i32** [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[TMP15]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK3-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]]) // CHECK3-NEXT: ret void // // @@ -1050,65 +1060,70 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>** [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* @_ZZ4mainE5sivar) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP0]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK4-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>, align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 4 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[G]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[SIVAR1]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1 -// CHECK4-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 1 -// CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load volatile i32, i32* @g, align 4 +// CHECK4-NEXT: store i32 [[TMP3]], i32* [[G]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], i32* [[SIVAR]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = icmp slt i32 [[TMP7]], 1 +// CHECK4-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP7]], i32 1 +// CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: switch i32 [[TMP11]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: switch i32 [[TMP13]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK4-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] +// CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.sections.case: // CHECK4-NEXT: store i32 1, i32* [[G]], align 4 -// CHECK4-NEXT: store i32 10, i32* [[SIVAR1]], align 4 +// CHECK4-NEXT: store i32 10, i32* [[SIVAR]], align 4 // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK4: .omp.sections.case2: +// CHECK4: .omp.sections.case1: // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]], i32 0, i32 1 @@ -1120,29 +1135,29 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP12:%.*]] = load volatile i32, i32* [[G]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP12]], i32* [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[BLOCK_CAPTURED3:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK4-NEXT: store i32 [[TMP13]], i32* [[BLOCK_CAPTURED3]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]] to void ()* -// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP14]] to %struct.__block_literal_generic* -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP16:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK4-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP15]], align 8 -// CHECK4-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP18]](i8* noundef [[TMP16]]) +// CHECK4-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP14]], i32* [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]], i32 0, i32 6 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], i32* [[BLOCK_CAPTURED2]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]] to void ()* +// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP16]] to %struct.__block_literal_generic* +// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP18:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK4-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP17]], align 8 +// CHECK4-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP20]](i8* noundef [[TMP18]]) // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK4-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/sections_lastprivate_codegen.cpp b/clang/test/OpenMP/sections_lastprivate_codegen.cpp --- a/clang/test/OpenMP/sections_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/sections_lastprivate_codegen.cpp @@ -197,6 +197,8 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 @@ -207,24 +209,34 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[T_VAR]], [2 x i32]* [[VEC]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[VAR]], i32* @_ZZ4mainE5sivar) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP5]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done1: +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done2: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP7]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -251,42 +263,40 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[SIVAR5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: @@ -296,88 +306,88 @@ // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = icmp slt i32 [[TMP7]], 0 -// CHECK1-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP7]], i32 0 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp slt i32 [[TMP13]], 0 +// CHECK1-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 0 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 0 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX6]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: store i32 31, i32* [[SIVAR5]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX1]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store i32 31, i32* [[SIVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK1-NEXT: [[TMP22:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast [2 x %struct.S]* [[S_ARR3]] to %struct.S* -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN7]], [[TMP24]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN2]], [[TMP30]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP23]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL8:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP29]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN2]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done9: -// CHECK1-NEXT: [[CALL10:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[SIVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP25]], i32* [[TMP4]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done4: +// CHECK1-NEXT: [[CALL5:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP31]], i32* [[TMP10]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP32]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done12: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP28]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done7: +// CHECK1-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP34]]) // CHECK1-NEXT: ret void // // @@ -392,10 +402,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -404,34 +415,36 @@ // CHECK1-NEXT: [[X:%.*]] = alloca double, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP9:%.*]] = load double, double* [[X]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00 +// CHECK1-NEXT: [[TMP10:%.*]] = load double, double* [[X]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP10]], 1.000000e+00 // CHECK1-NEXT: store double [[INC]], double* [[X]], align 8 // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.case1: @@ -439,21 +452,21 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[INC2]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP13:%.*]] = load double, double* [[X]], align 8 -// CHECK1-NEXT: store double [[TMP13]], double* @_ZN1A1xE, align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load double, double* [[X]], align 8 +// CHECK1-NEXT: store double [[TMP14]], double* @_ZN1A1xE, align 8 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -461,36 +474,45 @@ // CHECK1-SAME: () #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[T_VAR]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[VAR]]) +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP6]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -528,196 +550,195 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = icmp slt i32 [[TMP6]], 1 -// CHECK1-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 1 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 1 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP12]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE5:%.*]] +// CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK1: .omp.sections.case5: -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i64 0, i64 0 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX6]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) +// CHECK1: .omp.sections.case1: +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.1* @_ZN1SIiEaSERKS0_(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX2]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR3]] to %struct.S.0* -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN7]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN7]], [[TMP23]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN3]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN3]], [[TMP28]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP22]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL8:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done9: -// CHECK1-NEXT: [[CALL10:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP27]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN3]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.1* @_ZN1SIiEaSERKS0_(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done5: +// CHECK1-NEXT: [[CALL6:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.1* @_ZN1SIiEaSERKS0_(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP24]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done12: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP31]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, i32* [[F]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -734,79 +755,81 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK3-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 -// CHECK3-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 1 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: switch i32 [[TMP11]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] +// CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: // CHECK3-NEXT: store i32 1, i32* [[G]], align 4 -// CHECK3-NEXT: store i32 13, i32* [[SIVAR1]], align 4 +// CHECK3-NEXT: store i32 13, i32* [[SIVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK3: .omp.sections.case2: -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store i32* [[G]], i32** [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[SIVAR1]], i32** [[TMP11]], align 8 +// CHECK3: .omp.sections.case1: +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[G]], i32** [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[TMP13]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[G]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP15]], i32* @g, align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[G]], align 4 +// CHECK3-NEXT: store volatile i32 [[TMP17]], i32* @g, align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[TMP2]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -844,61 +867,66 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>** [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* @_ZZ4mainE5sivar) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP0]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK4-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>, align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 -// CHECK4-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 -// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1 +// CHECK4-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 1 +// CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: switch i32 [[TMP11]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK4-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] +// CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.sections.case: // CHECK4-NEXT: store i32 1, i32* [[G]], align 4 -// CHECK4-NEXT: store i32 17, i32* [[SIVAR1]], align 4 +// CHECK4-NEXT: store i32 17, i32* [[SIVAR]], align 4 // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK4: .omp.sections.case2: +// CHECK4: .omp.sections.case1: // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]], i32 0, i32 1 @@ -910,39 +938,39 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP10:%.*]] = load volatile i32, i32* [[G]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP10]], i32* [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[BLOCK_CAPTURED3:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK4-NEXT: store i32 [[TMP11]], i32* [[BLOCK_CAPTURED3]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]] to void ()* -// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP12]] to %struct.__block_literal_generic* -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP14:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK4-NEXT: [[TMP15:%.*]] = load i8*, i8** [[TMP13]], align 8 -// CHECK4-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP16]](i8* noundef [[TMP14]]) +// CHECK4-NEXT: [[TMP12:%.*]] = load volatile i32, i32* [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP12]], i32* [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]], i32 0, i32 6 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP13]], i32* [[BLOCK_CAPTURED2]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]] to void ()* +// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP14]] to %struct.__block_literal_generic* +// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP16:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK4-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP15]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP18]](i8* noundef [[TMP16]]) // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK4-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK4-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK4-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[G]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP20]], i32* @g, align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK4-NEXT: store i32 [[TMP21]], i32* [[TMP0]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP22]], i32* @g, align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP23]], i32* [[TMP2]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK4: .omp.lastprivate.done: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]]) // CHECK4-NEXT: ret void // // @@ -970,6 +998,8 @@ // CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK5-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK5-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK5-NEXT: store i32 0, i32* [[T_VAR]], align 4 @@ -980,24 +1010,34 @@ // CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 // CHECK5-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK5-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[T_VAR]], [2 x i32]* [[VEC]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[VAR]], i32* @_ZZ4mainE5sivar) -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i32* [[T_VAR]], i32** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP5]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK5-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK5-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done1: +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done2: // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK5-NEXT: ret i32 [[TMP2]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK5-NEXT: ret i32 [[TMP7]] // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -1024,42 +1064,40 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK5-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK5-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK5-NEXT: [[SIVAR5:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: @@ -1069,88 +1107,88 @@ // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = icmp slt i32 [[TMP7]], 0 -// CHECK5-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP7]], i32 0 -// CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = icmp slt i32 [[TMP13]], 0 +// CHECK5-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 0 +// CHECK5-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK5-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: switch i32 [[TMP13]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: switch i32 [[TMP19]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK5-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.sections.case: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 0 -// CHECK5-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 0 -// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX6]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK5-NEXT: store i32 31, i32* [[SIVAR5]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// CHECK5-NEXT: store i32 [[TMP20]], i32* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX1]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store i32 31, i32* [[SIVAR]], align 4 // CHECK5-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK5: .omp.sections.exit: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK5-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK5-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK5-NEXT: store i32 [[TMP20]], i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK5-NEXT: [[TMP22:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP23:%.*]] = bitcast [2 x %struct.S]* [[S_ARR3]] to %struct.S* -// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN7]], [[TMP24]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK5-NEXT: store i32 [[TMP26]], i32* [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK5-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP29:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN2]], [[TMP30]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP23]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[CALL8:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP29]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN2]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[CALL3:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done9: -// CHECK5-NEXT: [[CALL10:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[SIVAR5]], align 4 -// CHECK5-NEXT: store i32 [[TMP25]], i32* [[TMP4]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done4: +// CHECK5-NEXT: [[CALL5:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK5-NEXT: store i32 [[TMP31]], i32* [[TMP10]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK5-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP32]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done12: -// CHECK5-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP28]]) +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done7: +// CHECK5-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP34]]) // CHECK5-NEXT: ret void // // @@ -1165,10 +1203,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK5-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -1177,74 +1216,76 @@ // CHECK5-NEXT: [[X:%.*]] = alloca [[STRUCT_LASPRIVATE_CONDITIONAL:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], %struct.lasprivate.conditional* [[X]], i32 0, i32 1 -// CHECK5-NEXT: store i8 0, i8* [[TMP0]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], %struct.lasprivate.conditional* [[X]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1 -// CHECK5-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 1 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], %struct.lasprivate.conditional* [[X]], i32 0, i32 1 +// CHECK5-NEXT: store i8 0, i8* [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], %struct.lasprivate.conditional* [[X]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1 +// CHECK5-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 1 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: switch i32 [[TMP10]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: switch i32 [[TMP11]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK5-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK5-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.sections.case: -// CHECK5-NEXT: [[TMP11:%.*]] = load double, double* [[TMP1]], align 8 -// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK5-NEXT: store double [[INC]], double* [[TMP1]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp sle i32 [[TMP13]], [[TMP12]] -// CHECK5-NEXT: br i1 [[TMP14]], label [[LP_COND_THEN:%.*]], label [[LP_COND_EXIT:%.*]] +// CHECK5-NEXT: [[TMP12:%.*]] = load double, double* [[TMP2]], align 8 +// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK5-NEXT: store double [[INC]], double* [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = icmp sle i32 [[TMP14]], [[TMP13]] +// CHECK5-NEXT: br i1 [[TMP15]], label [[LP_COND_THEN:%.*]], label [[LP_COND_EXIT:%.*]] // CHECK5: lp_cond_then: -// CHECK5-NEXT: store i32 [[TMP12]], i32* @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load double, double* [[TMP1]], align 8 -// CHECK5-NEXT: store double [[TMP15]], double* @{{pl_cond[.].+[.|,]}} align 8 +// CHECK5-NEXT: store i32 [[TMP13]], i32* @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load double, double* [[TMP2]], align 8 +// CHECK5-NEXT: store double [[TMP16]], double* @{{pl_cond[.].+[.|,]}} align 8 // CHECK5-NEXT: br label [[LP_COND_EXIT]] // CHECK5: lp_cond_exit: -// CHECK5-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK5-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) // CHECK5-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK5: .omp.sections.case1: // CHECK5-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK5: .omp.sections.exit: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK5-NEXT: store i32 [[INC2]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP3]]) -// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP4]]) +// CHECK5-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP19:%.*]] = load double, double* @{{pl_cond[.].+[.|,]}} align 8 -// CHECK5-NEXT: store double [[TMP19]], double* [[TMP1]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = load double, double* [[TMP1]], align 8 -// CHECK5-NEXT: store double [[TMP20]], double* @_ZN1A1xE, align 8 +// CHECK5-NEXT: [[TMP20:%.*]] = load double, double* @{{pl_cond[.].+[.|,]}} align 8 +// CHECK5-NEXT: store double [[TMP20]], double* [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = load double, double* [[TMP2]], align 8 +// CHECK5-NEXT: store double [[TMP21]], double* @_ZN1A1xE, align 8 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // @@ -1252,36 +1293,45 @@ // CHECK5-SAME: () #[[ATTR7:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK5-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK5-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[T_VAR]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[VAR]]) +// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK5-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i32* [[T_VAR]], i32** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[TMP4]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK5: arraydestroy.done1: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK5-NEXT: ret i32 [[TMP2]] +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK5-NEXT: ret i32 [[TMP6]] // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -1319,195 +1369,194 @@ // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK5-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK5-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK5-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK5-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK5-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK5-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK5-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: -// CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = icmp slt i32 [[TMP6]], 1 -// CHECK5-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 1 -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 1 +// CHECK5-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: switch i32 [[TMP12]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: switch i32 [[TMP17]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK5-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK5-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE5:%.*]] +// CHECK5-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.sections.case: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 0 -// CHECK5-NEXT: store i32 [[TMP13]], i32* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// CHECK5-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 // CHECK5-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK5: .omp.sections.case5: -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i64 0, i64 0 -// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX6]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) +// CHECK5: .omp.sections.case1: +// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.1* @_ZN1SIiEaSERKS0_(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX2]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK5-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK5: .omp.sections.exit: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK5-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK5-NEXT: store i32 [[TMP19]], i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK5-NEXT: [[TMP21:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP22:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR3]] to %struct.S.0* -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN7]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN7]], [[TMP23]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK5-NEXT: store i32 [[TMP24]], i32* [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK5-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP27:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN3]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN3]], [[TMP28]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP22]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[CALL8:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done9: -// CHECK5-NEXT: [[CALL10:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP3]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP27]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN3]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.1* @_ZN1SIiEaSERKS0_(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done5: +// CHECK5-NEXT: [[CALL6:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.1* @_ZN1SIiEaSERKS0_(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK5-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN7]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP24]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done12: -// CHECK5-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done8: +// CHECK5-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP31]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK5-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK5-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK5-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK5-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: store i32 0, i32* [[F]], align 4 // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK5-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK5-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK5-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK5-NEXT: ret void // diff --git a/clang/test/OpenMP/sections_private_codegen.cpp b/clang/test/OpenMP/sections_private_codegen.cpp --- a/clang/test/OpenMP/sections_private_codegen.cpp +++ b/clang/test/OpenMP/sections_private_codegen.cpp @@ -113,6 +113,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 @@ -123,7 +124,7 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4:[0-9]+]] @@ -166,10 +167,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -182,6 +184,8 @@ // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 @@ -197,61 +201,61 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct.S* [[ARRAYIDX1]] to i8* -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP10]], i8* align 4 [[TMP11]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[ARRAYIDX1]] to i8* +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 4, i1 false) // CHECK1-NEXT: store i32 2, i32* [[SIVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP15]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP16]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done3: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]]) // CHECK1-NEXT: ret void // // @@ -274,6 +278,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* @@ -283,7 +288,7 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 @@ -359,10 +364,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -374,6 +380,8 @@ // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 @@ -389,63 +397,63 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.case1: // CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct.S.0* [[ARRAYIDX2]] to i8* -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP10]], i8* align 4 [[TMP11]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.S.0* [[ARRAYIDX2]] to i8* +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 4, i1 false) // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN3]], i64 2 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN3]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP15]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP16]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN3]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done4: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]]) // CHECK1-NEXT: ret void // // @@ -504,10 +512,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK3-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -518,28 +527,30 @@ // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK3-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK3-NEXT: ] @@ -548,22 +559,22 @@ // CHECK3-NEXT: store i32 11, i32* [[SIVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.case1: -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G]], double** [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[TMP11]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // @@ -583,18 +594,20 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK4-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -605,28 +618,30 @@ // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK4-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK4-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK4-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK4-NEXT: ] @@ -646,29 +661,29 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.1 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP9:%.*]] = load volatile double, double* [[G]], align 8 -// CHECK4-NEXT: store volatile double [[TMP9]], double* [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load volatile double, double* [[G]], align 8 +// CHECK4-NEXT: store volatile double [[TMP10]], double* [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR]], align 4 -// CHECK4-NEXT: store i32 [[TMP10]], i32* [[BLOCK_CAPTURED2]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]] to void ()* -// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP11]] to %struct.__block_literal_generic* -// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP13:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP12]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP15]](i8* noundef [[TMP13]]) +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP11]], i32* [[BLOCK_CAPTURED2]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]] to void ()* +// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP12]] to %struct.__block_literal_generic* +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP14:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK4-NEXT: [[TMP15:%.*]] = load i8*, i8** [[TMP13]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP16]](i8* noundef [[TMP14]]) // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK4-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/sections_reduction_codegen.cpp b/clang/test/OpenMP/sections_reduction_codegen.cpp --- a/clang/test/OpenMP/sections_reduction_codegen.cpp +++ b/clang/test/OpenMP/sections_reduction_codegen.cpp @@ -192,6 +192,7 @@ // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store float 0.000000e+00, float* [[T_VAR]], align 4 @@ -203,24 +204,36 @@ // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, %struct.S*, %struct.S*, float*, [2 x i32]*, [2 x %struct.S]*)* @.omp_outlined. to void (i32*, i32*, ...)*), float* [[T_VAR]], %struct.S* [[VAR]], %struct.S* [[VAR1]], float* [[T_VAR1]], [2 x i32]* [[VEC]], [2 x %struct.S]* [[S_ARR]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store float* [[T_VAR]], float** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.S* [[VAR1]], %struct.S** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[T_VAR1]], float** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP6]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP8]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -247,217 +260,214 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca float*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca float*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[VAR3:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[VAR14:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[T_VAR15:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[REF_TMP11:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca float, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store float* [[T_VAR]], float** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR1]], %struct.S** [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store float* [[T_VAR1]], float** [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float*, float** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S*, %struct.S** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S*, %struct.S** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP11]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: store float 0.000000e+00, float* [[T_VAR2]], align 4 -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: store float 0x47EFFFFFE0000000, float* [[T_VAR15]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = icmp slt i32 [[TMP8]], 0 -// CHECK1-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 0 -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: store float 0.000000e+00, float* [[T_VAR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: store float 0x47EFFFFFE0000000, float* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = icmp slt i32 [[TMP15]], 0 +// CHECK1-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP15]], i32 0 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP14]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP15:%.*]] = load float, float* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP15]] to i32 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load float, float* [[T_VAR]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP22]] to i32 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP10]], i64 0, i64 0 // CHECK1-NEXT: store i32 [[CONV]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast %struct.S* [[VAR3]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP16]], i8* align 4 [[TMP17]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP18:%.*]] = load float, float* [[T_VAR15]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = fptosi float [[TMP18]] to i32 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: store i32 [[CONV7]], i32* [[ARRAYIDX8]], align 4 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP5]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[VAR14]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 4, i1 false) +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP12]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.S* [[ARRAYIDX1]] to i8* +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP23]], i8* align 4 [[TMP24]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP25:%.*]] = load float, float* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CONV2:%.*]] = fptosi float [[TMP25]] to i32 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: store i32 [[CONV2]], i32* [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP12]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[VAR1]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false) // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast float* [[T_VAR2]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast %struct.S* [[VAR3]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[VAR14]] to i8* -// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP29:%.*]] = bitcast float* [[T_VAR15]] to i8* -// CHECK1-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = bitcast [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 4, i64 32, i8* [[TMP30]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast float* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[VAR1]] to i8* +// CHECK1-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP36:%.*]] = bitcast float* [[T_VAR1]] to i8* +// CHECK1-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = bitcast [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 4, i64 32, i8* [[TMP37]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP38]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP32:%.*]] = load float, float* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load float, float* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP32]], [[TMP33]] -// CHECK1-NEXT: store float [[ADD]], float* [[TMP0]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP1]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP1]] to i8* -// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[CALL]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL10:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL10]], 0.000000e+00 +// CHECK1-NEXT: [[TMP39:%.*]] = load float, float* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load float, float* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP39]], [[TMP40]] +// CHECK1-NEXT: store float [[ADD]], float* [[TMP2]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP4]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP41:%.*]] = bitcast %struct.S* [[TMP4]] to i8* +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast %struct.S* [[CALL]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP41]], i8* align 4 [[TMP42]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL5:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL5]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL11:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL12:%.*]] = fcmp une float [[CALL11]], 0.000000e+00 +// CHECK1-NEXT: [[CALL6:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL7:%.*]] = fcmp une float [[CALL6]], 0.000000e+00 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP36:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL12]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV13:%.*]] = uitofp i1 [[TMP36]] to float -// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], float noundef [[CONV13]]) -// CHECK1-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP2]] to i8* -// CHECK1-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP43:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL7]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV8:%.*]] = uitofp i1 [[TMP43]] to float +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], float noundef [[CONV8]]) +// CHECK1-NEXT: [[TMP44:%.*]] = bitcast %struct.S* [[TMP6]] to i8* +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[REF_TMP]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP44]], i8* align 4 [[TMP45]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP39:%.*]] = load float, float* [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = load float, float* [[T_VAR15]], align 4 -// CHECK1-NEXT: [[CMP14:%.*]] = fcmp olt float [[TMP39]], [[TMP40]] -// CHECK1-NEXT: br i1 [[CMP14]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP46:%.*]] = load float, float* [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load float, float* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = fcmp olt float [[TMP46]], [[TMP47]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP41:%.*]] = load float, float* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load float, float* [[TMP8]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP42:%.*]] = load float, float* [[T_VAR15]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = load float, float* [[T_VAR1]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi float [ [[TMP41]], [[COND_TRUE]] ], [ [[TMP42]], [[COND_FALSE]] ] -// CHECK1-NEXT: store float [[COND]], float* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[COND:%.*]] = phi float [ [[TMP48]], [[COND_TRUE]] ], [ [[TMP49]], [[COND_FALSE]] ] +// CHECK1-NEXT: store float [[COND]], float* [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP43:%.*]] = load float, float* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP44:%.*]] = atomicrmw fadd float* [[TMP0]], float [[TMP43]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP1]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[TMP1]] to i8* -// CHECK1-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[CALL15]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL17:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL18:%.*]] = fcmp une float [[CALL17]], 0.000000e+00 -// CHECK1-NEXT: br i1 [[TOBOOL18]], label [[LAND_RHS19:%.*]], label [[LAND_END22:%.*]] -// CHECK1: land.rhs19: -// CHECK1-NEXT: [[CALL20:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL21:%.*]] = fcmp une float [[CALL20]], 0.000000e+00 -// CHECK1-NEXT: br label [[LAND_END22]] -// CHECK1: land.end22: -// CHECK1-NEXT: [[TMP47:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL21]], [[LAND_RHS19]] ] -// CHECK1-NEXT: [[CONV23:%.*]] = uitofp i1 [[TMP47]] to float -// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP16]], float noundef [[CONV23]]) -// CHECK1-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[TMP2]] to i8* -// CHECK1-NEXT: [[TMP49:%.*]] = bitcast %struct.S* [[REF_TMP16]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP48]], i8* align 4 [[TMP49]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR4]] -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP50:%.*]] = load float, float* [[T_VAR15]], align 4 -// CHECK1-NEXT: [[TMP51:%.*]] = bitcast float* [[TMP3]] to i32* -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP51]] monotonic, align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load float, float* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = atomicrmw fadd float* [[TMP2]], float [[TMP50]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL10:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEanERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP4]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP52:%.*]] = bitcast %struct.S* [[TMP4]] to i8* +// CHECK1-NEXT: [[TMP53:%.*]] = bitcast %struct.S* [[CALL10]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP52]], i8* align 4 [[TMP53]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL12:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL13:%.*]] = fcmp une float [[CALL12]], 0.000000e+00 +// CHECK1-NEXT: br i1 [[TOBOOL13]], label [[LAND_RHS14:%.*]], label [[LAND_END17:%.*]] +// CHECK1: land.rhs14: +// CHECK1-NEXT: [[CALL15:%.*]] = call noundef float @_ZN1SIfEcvfEv(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL16:%.*]] = fcmp une float [[CALL15]], 0.000000e+00 +// CHECK1-NEXT: br label [[LAND_END17]] +// CHECK1: land.end17: +// CHECK1-NEXT: [[TMP54:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL16]], [[LAND_RHS14]] ] +// CHECK1-NEXT: [[CONV18:%.*]] = uitofp i1 [[TMP54]] to float +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP11]], float noundef [[CONV18]]) +// CHECK1-NEXT: [[TMP55:%.*]] = bitcast %struct.S* [[TMP6]] to i8* +// CHECK1-NEXT: [[TMP56:%.*]] = bitcast %struct.S* [[REF_TMP11]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP55]], i8* align 4 [[TMP56]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[REF_TMP11]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP57:%.*]] = load float, float* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP58:%.*]] = bitcast float* [[TMP8]] to i32* +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, i32* [[TMP58]] monotonic, align 4 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP52:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END22]] ], [ [[TMP62:%.*]], [[COND_END27:%.*]] ] -// CHECK1-NEXT: [[TMP53:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32* -// CHECK1-NEXT: [[TMP54:%.*]] = bitcast i32 [[TMP52]] to float -// CHECK1-NEXT: store float [[TMP54]], float* [[TMP]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = load float, float* [[TMP]], align 4 -// CHECK1-NEXT: [[TMP56:%.*]] = load float, float* [[T_VAR15]], align 4 -// CHECK1-NEXT: [[CMP24:%.*]] = fcmp olt float [[TMP55]], [[TMP56]] -// CHECK1-NEXT: br i1 [[CMP24]], label [[COND_TRUE25:%.*]], label [[COND_FALSE26:%.*]] -// CHECK1: cond.true25: -// CHECK1-NEXT: [[TMP57:%.*]] = load float, float* [[TMP]], align 4 -// CHECK1-NEXT: br label [[COND_END27]] -// CHECK1: cond.false26: -// CHECK1-NEXT: [[TMP58:%.*]] = load float, float* [[T_VAR15]], align 4 -// CHECK1-NEXT: br label [[COND_END27]] -// CHECK1: cond.end27: -// CHECK1-NEXT: [[COND28:%.*]] = phi float [ [[TMP57]], [[COND_TRUE25]] ], [ [[TMP58]], [[COND_FALSE26]] ] -// CHECK1-NEXT: store float [[COND28]], float* [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP59:%.*]] = load i32, i32* [[TMP53]], align 4 -// CHECK1-NEXT: [[TMP60:%.*]] = bitcast float* [[TMP3]] to i32* -// CHECK1-NEXT: [[TMP61:%.*]] = cmpxchg i32* [[TMP60]], i32 [[TMP52]], i32 [[TMP59]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP62]] = extractvalue { i32, i1 } [[TMP61]], 0 -// CHECK1-NEXT: [[TMP63:%.*]] = extractvalue { i32, i1 } [[TMP61]], 1 -// CHECK1-NEXT: br i1 [[TMP63]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP59:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END17]] ], [ [[TMP69:%.*]], [[COND_END22:%.*]] ] +// CHECK1-NEXT: [[TMP60:%.*]] = bitcast float* [[ATOMIC_TEMP]] to i32* +// CHECK1-NEXT: [[TMP61:%.*]] = bitcast i32 [[TMP59]] to float +// CHECK1-NEXT: store float [[TMP61]], float* [[TMP]], align 4 +// CHECK1-NEXT: [[TMP62:%.*]] = load float, float* [[TMP]], align 4 +// CHECK1-NEXT: [[TMP63:%.*]] = load float, float* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP19:%.*]] = fcmp olt float [[TMP62]], [[TMP63]] +// CHECK1-NEXT: br i1 [[CMP19]], label [[COND_TRUE20:%.*]], label [[COND_FALSE21:%.*]] +// CHECK1: cond.true20: +// CHECK1-NEXT: [[TMP64:%.*]] = load float, float* [[TMP]], align 4 +// CHECK1-NEXT: br label [[COND_END22]] +// CHECK1: cond.false21: +// CHECK1-NEXT: [[TMP65:%.*]] = load float, float* [[T_VAR1]], align 4 +// CHECK1-NEXT: br label [[COND_END22]] +// CHECK1: cond.end22: +// CHECK1-NEXT: [[COND23:%.*]] = phi float [ [[TMP64]], [[COND_TRUE20]] ], [ [[TMP65]], [[COND_FALSE21]] ] +// CHECK1-NEXT: store float [[COND23]], float* [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP66:%.*]] = load i32, i32* [[TMP60]], align 4 +// CHECK1-NEXT: [[TMP67:%.*]] = bitcast float* [[TMP8]] to i32* +// CHECK1-NEXT: [[TMP68:%.*]] = cmpxchg i32* [[TMP67]], i32 [[TMP59]], i32 [[TMP66]] monotonic monotonic, align 4 +// CHECK1-NEXT: [[TMP69]] = extractvalue { i32, i1 } [[TMP68]], 0 +// CHECK1-NEXT: [[TMP70:%.*]] = extractvalue { i32, i1 } [[TMP68]], 1 +// CHECK1-NEXT: br i1 [[TMP70]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) #[[ATTR4]] -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) #[[ATTR4]] -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP7]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP14]]) // CHECK1-NEXT: ret void // // @@ -578,6 +588,7 @@ // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 // CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* @@ -588,23 +599,35 @@ // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.S.0*, %struct.S.0*, i32*, [2 x i32]*, [2 x %struct.S.0]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[T_VAR]], %struct.S.0* [[VAR]], %struct.S.0* [[VAR1]], i32* [[T_VAR1]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.S.0* [[VAR1]], %struct.S.0** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i32* [[T_VAR1]], i32** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP6]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR4]] // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP8]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -671,180 +694,177 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR3:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[VAR14:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: [[T_VAR15:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: [[REF_TMP13:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[REF_TMP8:%.*]] = alloca [[STRUCT_S_0]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR1]], %struct.S.0** [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR1]], i32** [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP11]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[T_VAR2]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: store i32 2147483647, i32* [[T_VAR15]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = icmp slt i32 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 1 -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: store i32 2147483647, i32* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = icmp slt i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP15]], i32 1 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP14]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE6:%.*]] +// CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP10]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK1: .omp.sections.case6: -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast %struct.S.0* [[ARRAYIDX7]] to i8* -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast %struct.S.0* [[VAR3]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP16]], i8* align 4 [[TMP17]], i64 4, i1 false) +// CHECK1: .omp.sections.case1: +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP12]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[ARRAYIDX2]] to i8* +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP23]], i8* align 4 [[TMP24]], i64 4, i1 false) // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[T_VAR2]] to i8* -// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[VAR3]] to i8* -// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast %struct.S.0* [[VAR14]] to i8* -// CHECK1-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[T_VAR15]] to i8* -// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 4, i64 32, i8* [[TMP27]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP28]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[VAR1]] to i8* +// CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[T_VAR1]] to i8* +// CHECK1-NEXT: store i8* [[TMP33]], i8** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast [4 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP35:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP14]], i32 4, i64 32, i8* [[TMP34]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP35]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEanERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[TMP1]] to i8* -// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[CALL]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL8:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL8]], 0 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEanERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP4]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP38:%.*]] = bitcast %struct.S.0* [[TMP4]] to i8* +// CHECK1-NEXT: [[TMP39:%.*]] = bitcast %struct.S.0* [[CALL]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL3]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL9:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL10:%.*]] = icmp ne i32 [[CALL9]], 0 +// CHECK1-NEXT: [[CALL4:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL5:%.*]] = icmp ne i32 [[CALL4]], 0 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP33:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL10]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP33]] to i32 +// CHECK1-NEXT: [[TMP40:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL5]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP40]] to i32 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[CONV]]) -// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[TMP2]] to i8* -// CHECK1-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP41:%.*]] = bitcast %struct.S.0* [[TMP6]] to i8* +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast %struct.S.0* [[REF_TMP]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP41]], i8* align 4 [[TMP42]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[T_VAR15]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP36]], [[TMP37]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP43]], [[TMP44]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[T_VAR15]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[T_VAR1]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP38]], [[COND_TRUE]] ], [ [[TMP39]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE]] ], [ [[TMP46]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP40]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL12:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEanERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: [[TMP42:%.*]] = bitcast %struct.S.0* [[TMP1]] to i8* -// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.S.0* [[CALL12]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL14:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL15:%.*]] = icmp ne i32 [[CALL14]], 0 -// CHECK1-NEXT: br i1 [[TOBOOL15]], label [[LAND_RHS16:%.*]], label [[LAND_END19:%.*]] -// CHECK1: land.rhs16: -// CHECK1-NEXT: [[CALL17:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL18:%.*]] = icmp ne i32 [[CALL17]], 0 -// CHECK1-NEXT: br label [[LAND_END19]] -// CHECK1: land.end19: -// CHECK1-NEXT: [[TMP44:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL18]], [[LAND_RHS16]] ] -// CHECK1-NEXT: [[CONV20:%.*]] = zext i1 [[TMP44]] to i32 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP13]], i32 noundef [[CONV20]]) -// CHECK1-NEXT: [[TMP45:%.*]] = bitcast %struct.S.0* [[TMP2]] to i8* -// CHECK1-NEXT: [[TMP46:%.*]] = bitcast %struct.S.0* [[REF_TMP13]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP13]]) #[[ATTR4]] -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[T_VAR15]], align 4 -// CHECK1-NEXT: [[TMP48:%.*]] = atomicrmw min i32* [[TMP3]], i32 [[TMP47]] monotonic, align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP47]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL7:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEanERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP4]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP49:%.*]] = bitcast %struct.S.0* [[TMP4]] to i8* +// CHECK1-NEXT: [[TMP50:%.*]] = bitcast %struct.S.0* [[CALL7]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP49]], i8* align 4 [[TMP50]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL9:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL10:%.*]] = icmp ne i32 [[CALL9]], 0 +// CHECK1-NEXT: br i1 [[TOBOOL10]], label [[LAND_RHS11:%.*]], label [[LAND_END14:%.*]] +// CHECK1: land.rhs11: +// CHECK1-NEXT: [[CALL12:%.*]] = call noundef i32 @_ZN1SIiEcviEv(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL13:%.*]] = icmp ne i32 [[CALL12]], 0 +// CHECK1-NEXT: br label [[LAND_END14]] +// CHECK1: land.end14: +// CHECK1-NEXT: [[TMP51:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL13]], [[LAND_RHS11]] ] +// CHECK1-NEXT: [[CONV15:%.*]] = zext i1 [[TMP51]] to i32 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP8]], i32 noundef [[CONV15]]) +// CHECK1-NEXT: [[TMP52:%.*]] = bitcast %struct.S.0* [[TMP6]] to i8* +// CHECK1-NEXT: [[TMP53:%.*]] = bitcast %struct.S.0* [[REF_TMP8]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP52]], i8* align 4 [[TMP53]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[REF_TMP8]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP55:%.*]] = atomicrmw min i32* [[TMP8]], i32 [[TMP54]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR14]]) #[[ATTR4]] -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR3]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -1004,10 +1024,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK3-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -1018,29 +1039,31 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 // CHECK3-NEXT: store double 0.000000e+00, double* [[G]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK3-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK3-NEXT: ] @@ -1048,42 +1071,42 @@ // CHECK3-NEXT: store double 1.000000e+00, double* [[G]], align 8 // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.case1: -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G]], double** [[TMP9]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP10]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK3-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast double* [[G]] to i8* -// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 1, i64 8, i8* [[TMP13]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP14]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast double* [[G]] to i8* +// CHECK3-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP14]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP15:%.*]] = load double, double* @g, align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = load double, double* [[G]], align 8 -// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP16:%.*]] = load double, double* @g, align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP16]], [[TMP17]] // CHECK3-NEXT: store double [[ADD]], double* @g, align 8 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP17:%.*]] = load double, double* [[G]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = atomicrmw fadd double* @g, double [[TMP17]] monotonic, align 8 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw fadd double* @g, double [[TMP18]] monotonic, align 8 +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // @@ -1127,18 +1150,20 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK4-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -1149,29 +1174,31 @@ // CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 // CHECK4-NEXT: store double 0.000000e+00, double* [[G]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK4-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK4-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK4-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK4-NEXT: ] @@ -1190,48 +1217,48 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double }>* [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.1 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double }>* [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP9:%.*]] = load volatile double, double* [[G]], align 8 -// CHECK4-NEXT: store volatile double [[TMP9]], double* [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double }>* [[BLOCK]] to void ()* -// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP10]] to %struct.__block_literal_generic* -// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP12:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK4-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP11]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP14]](i8* noundef [[TMP12]]) +// CHECK4-NEXT: [[TMP10:%.*]] = load volatile double, double* [[G]], align 8 +// CHECK4-NEXT: store volatile double [[TMP10]], double* [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double }>* [[BLOCK]] to void ()* +// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP11]] to %struct.__block_literal_generic* +// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP13:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP12]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP15]](i8* noundef [[TMP13]]) // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK4-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP17:%.*]] = bitcast double* [[G]] to i8* -// CHECK4-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK4-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK4-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 1, i64 8, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK4-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP18:%.*]] = bitcast double* [[G]] to i8* +// CHECK4-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK4-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP19]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.reduction.case1: -// CHECK4-NEXT: [[TMP20:%.*]] = load double, double* @g, align 8 -// CHECK4-NEXT: [[TMP21:%.*]] = load double, double* [[G]], align 8 -// CHECK4-NEXT: [[ADD:%.*]] = fadd double [[TMP20]], [[TMP21]] +// CHECK4-NEXT: [[TMP21:%.*]] = load double, double* @g, align 8 +// CHECK4-NEXT: [[TMP22:%.*]] = load double, double* [[G]], align 8 +// CHECK4-NEXT: [[ADD:%.*]] = fadd double [[TMP21]], [[TMP22]] // CHECK4-NEXT: store double [[ADD]], double* @g, align 8 -// CHECK4-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.case2: -// CHECK4-NEXT: [[TMP22:%.*]] = load double, double* [[G]], align 8 -// CHECK4-NEXT: [[TMP23:%.*]] = atomicrmw fadd double* @g, double [[TMP22]] monotonic, align 8 -// CHECK4-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP23:%.*]] = load double, double* [[G]], align 8 +// CHECK4-NEXT: [[TMP24:%.*]] = atomicrmw fadd double* @g, double [[TMP23]] monotonic, align 8 +// CHECK4-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.default: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/sections_reduction_task_codegen.cpp b/clang/test/OpenMP/sections_reduction_task_codegen.cpp --- a/clang/test/OpenMP/sections_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/sections_reduction_task_codegen.cpp @@ -41,283 +41,289 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[ARGC_ADDR]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i8*** [[ARGV_ADDR]], i8**** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8***, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP24:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP23:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i8*** [[ARGV]], i8**** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8***, i8**** [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8***, i8**** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = sext i32 [[TMP4]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP5]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 9 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint i8* [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP12:%.*]] = add nuw i64 [[TMP11]], 1 -// CHECK1-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP14:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP12]], align 16 -// CHECK1-NEXT: store i64 [[TMP12]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP12]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP15]] +// CHECK1-NEXT: store i32 0, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8**, i8*** [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i8**, i8*** [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8*, i8** [[TMP9]], i64 9 +// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint i8* [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP15:%.*]] = add nuw i64 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP17:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP17]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP15]], align 16 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint i8* [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = sub i64 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: [[TMP21:%.*]] = sdiv exact i64 [[TMP20]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP21]] -// CHECK1-NEXT: store i8** [[_TMP5]], i8*** [[TMP]], align 8 -// CHECK1-NEXT: store i8* [[TMP22]], i8** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8**, i8*** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i8*, i8** [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint i8* [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP23:%.*]] = sub i64 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP24:%.*]] = sdiv exact i64 [[TMP23]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP24]] +// CHECK1-NEXT: store i8** [[_TMP4]], i8*** [[TMP]], align 8 +// CHECK1-NEXT: store i8* [[TMP25]], i8** [[_TMP4]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, i64* [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP32]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load i8**, i8*** [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8*, i8** [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i8*, i8** [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, i8* [[TMP35]], i64 0 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = sext i32 [[TMP36]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP37]] -// CHECK1-NEXT: [[TMP38:%.*]] = load i8**, i8*** [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8*, i8** [[TMP38]], i64 9 -// CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[TMP39]], i64 [[LB_ADD_LEN9]] -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store i8* [[ARRAYIDX8]], i8** [[TMP40]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint i8* [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP42:%.*]] = ptrtoint i8* [[ARRAYIDX8]] to i64 -// CHECK1-NEXT: [[TMP43:%.*]] = sub i64 [[TMP41]], [[TMP42]] -// CHECK1-NEXT: [[TMP44:%.*]] = sdiv exact i64 [[TMP43]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP45:%.*]] = add nuw i64 [[TMP44]], 1 -// CHECK1-NEXT: [[TMP46:%.*]] = mul nuw i64 [[TMP45]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP46]], i64* [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..1 to i8*), i8** [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..2 to i8*), i8** [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, i32* [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[TMP52]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* -// CHECK1-NEXT: [[TMP55:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP53]], i32 1, i32 2, i8* [[TMP54]]) -// CHECK1-NEXT: store i8* [[TMP55]], i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load i32, i32* [[TMP56]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP57]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP59:%.*]] = icmp slt i32 [[TMP58]], 0 -// CHECK1-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP58]], i32 0 -// CHECK1-NEXT: store i32 [[TMP60]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP61]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast i32* [[ARGC]] to i8* +// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP2]] to i8* +// CHECK1-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, i64* [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast i32* [[TMP34]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP35]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP37:%.*]] = load i8**, i8*** [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP37]], i64 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP38]], i64 0 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = sext i32 [[TMP39]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 -1, [[TMP40]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i8**, i8*** [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8*, i8** [[TMP41]], i64 9 +// CHECK1-NEXT: [[TMP42:%.*]] = load i8*, i8** [[ARRAYIDX9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, i8* [[TMP42]], i64 [[LB_ADD_LEN8]] +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 +// CHECK1-NEXT: store i8* [[ARRAYIDX7]], i8** [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint i8* [[ARRAYIDX10]] to i64 +// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint i8* [[ARRAYIDX7]] to i64 +// CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] +// CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP48:%.*]] = add nuw i64 [[TMP47]], 1 +// CHECK1-NEXT: [[TMP49:%.*]] = mul nuw i64 [[TMP48]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP49]], i64* [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..1 to i8*), i8** [[TMP51]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..2 to i8*), i8** [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, i32* [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4 +// CHECK1-NEXT: [[TMP57:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* +// CHECK1-NEXT: [[TMP58:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP56]], i32 1, i32 2, i8* [[TMP57]]) +// CHECK1-NEXT: store i8* [[TMP58]], i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP59]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP60]], i32 34, i32* [[DOTOMP_SECTIONS_IL_]], i32* [[DOTOMP_SECTIONS_LB_]], i32* [[DOTOMP_SECTIONS_UB_]], i32* [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP62:%.*]] = icmp slt i32 [[TMP61]], 0 +// CHECK1-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], i32 [[TMP61]], i32 0 +// CHECK1-NEXT: store i32 [[TMP63]], i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP64:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP64]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP62:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP63:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP62]], [[TMP63]] +// CHECK1-NEXT: [[TMP65:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP66:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP65]], [[TMP66]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP64:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP64]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP67:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP67]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP65]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[ARGC1]], i32** [[TMP66]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP68:%.*]] = load i8**, i8*** [[TMP]], align 8 -// CHECK1-NEXT: store i8** [[TMP68]], i8*** [[TMP67]], align 8 -// CHECK1-NEXT: [[TMP69:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = load i32, i32* [[TMP69]], align 4 -// CHECK1-NEXT: [[TMP71:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP70]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP72:%.*]] = bitcast i8* [[TMP71]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP72]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP73]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP75:%.*]] = load i8*, i8** [[TMP74]], align 8 -// CHECK1-NEXT: [[TMP76:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP75]], i8* align 8 [[TMP76]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP72]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP77]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP79:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store i8* [[TMP79]], i8** [[TMP78]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, i32* [[TMP80]], align 4 -// CHECK1-NEXT: [[TMP82:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP81]], i8* [[TMP71]]) +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[ARGC]], i32** [[TMP69]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP71:%.*]] = load i8**, i8*** [[TMP]], align 8 +// CHECK1-NEXT: store i8** [[TMP71]], i8*** [[TMP70]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = load i32, i32* [[TMP72]], align 4 +// CHECK1-NEXT: [[TMP74:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP73]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP75:%.*]] = bitcast i8* [[TMP74]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP75]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP76]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP78:%.*]] = load i8*, i8** [[TMP77]], align 8 +// CHECK1-NEXT: [[TMP79:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP78]], i8* align 8 [[TMP79]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP75]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP80]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP82:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store i8* [[TMP82]], i8** [[TMP81]], align 8 +// CHECK1-NEXT: [[TMP83:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = load i32, i32* [[TMP83]], align 4 +// CHECK1-NEXT: [[TMP85:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP84]], i8* [[TMP74]]) // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP83:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP83]], 1 +// CHECK1-NEXT: [[TMP86:%.*]] = load i32, i32* [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP86]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: [[TMP84:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, i32* [[TMP84]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP85]]) -// CHECK1-NEXT: [[TMP86:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP87:%.*]] = load i32, i32* [[TMP86]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP87]], i32 1) -// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP89:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP89]], i8** [[TMP88]], align 8 -// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP90]], align 8 -// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP12]] to i8* +// CHECK1-NEXT: [[TMP87:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, i32* [[TMP87]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP88]]) +// CHECK1-NEXT: [[TMP89:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP90:%.*]] = load i32, i32* [[TMP89]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP90]], i32 1) +// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP92:%.*]] = bitcast i32* [[ARGC]] to i8* // CHECK1-NEXT: store i8* [[TMP92]], i8** [[TMP91]], align 8 -// CHECK1-NEXT: [[TMP93:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP94:%.*]] = load i32, i32* [[TMP93]], align 4 -// CHECK1-NEXT: [[TMP95:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP96:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP94]], i32 2, i64 24, i8* [[TMP95]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP96]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP93:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP93]], align 8 +// CHECK1-NEXT: [[TMP94:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP95:%.*]] = inttoptr i64 [[TMP15]] to i8* +// CHECK1-NEXT: store i8* [[TMP95]], i8** [[TMP94]], align 8 +// CHECK1-NEXT: [[TMP96:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP97:%.*]] = load i32, i32* [[TMP96]], align 4 +// CHECK1-NEXT: [[TMP98:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP99:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP97]], i32 2, i64 24, i8* [[TMP98]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP99]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP97:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP98:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP97]], [[TMP98]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP99:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP12]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP99]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP100:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP101:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP100]], [[TMP101]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP102:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP102]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP100:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP100]] to i32 -// CHECK1-NEXT: [[TMP101:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP101]] to i32 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV]], [[CONV13]] -// CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK1-NEXT: store i8 [[CONV15]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP103:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP103]] to i32 +// CHECK1-NEXT: [[TMP104:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV12:%.*]] = sext i8 [[TMP104]] to i32 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV]], [[CONV12]] +// CHECK1-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i8 +// CHECK1-NEXT: store i8 [[CONV14]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP99]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP94]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP102]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done17: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP97]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP102:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP103:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP102]] monotonic, align 4 -// CHECK1-NEXT: [[TMP104:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP12]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP104]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] -// CHECK1: omp.arraycpy.body20: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP105:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP105]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST22]] monotonic, align 1 +// CHECK1-NEXT: [[TMP105:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP106:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP105]] monotonic, align 4 +// CHECK1-NEXT: [[TMP107:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP107]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE31:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] +// CHECK1: omp.arraycpy.body19: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT29:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT28:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP108:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV22:%.*]] = sext i8 [[TMP108]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST21]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP106:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY20]] ], [ [[TMP111:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP106]], i8* [[_TMP24]], align 1 -// CHECK1-NEXT: [[TMP107:%.*]] = load i8, i8* [[_TMP24]], align 1 -// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP107]] to i32 -// CHECK1-NEXT: [[TMP108:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP108]] to i32 -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[CONV25]], [[CONV26]] -// CHECK1-NEXT: [[CONV28:%.*]] = trunc i32 [[ADD27]] to i8 -// CHECK1-NEXT: store i8 [[CONV28]], i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP109:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP110:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i8 [[TMP106]], i8 [[TMP109]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP111]] = extractvalue { i8, i1 } [[TMP110]], 0 -// CHECK1-NEXT: [[TMP112:%.*]] = extractvalue { i8, i1 } [[TMP110]], 1 -// CHECK1-NEXT: br i1 [[TMP112]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP109:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY19]] ], [ [[TMP114:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP109]], i8* [[_TMP23]], align 1 +// CHECK1-NEXT: [[TMP110:%.*]] = load i8, i8* [[_TMP23]], align 1 +// CHECK1-NEXT: [[CONV24:%.*]] = sext i8 [[TMP110]] to i32 +// CHECK1-NEXT: [[TMP111:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP111]] to i32 +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[CONV24]], [[CONV25]] +// CHECK1-NEXT: [[CONV27:%.*]] = trunc i32 [[ADD26]] to i8 +// CHECK1-NEXT: store i8 [[CONV27]], i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP112:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP113:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i8 [[TMP109]], i8 [[TMP112]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP114]] = extractvalue { i8, i1 } [[TMP113]], 0 +// CHECK1-NEXT: [[TMP115:%.*]] = extractvalue { i8, i1 } [[TMP113]], 1 +// CHECK1-NEXT: br i1 [[TMP115]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP104]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY20]] -// CHECK1: omp.arraycpy.done32: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP94]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT28]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT29]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE30:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT28]], [[TMP107]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_BODY19]] +// CHECK1: omp.arraycpy.done31: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP97]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP113:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP113]]) -// CHECK1-NEXT: [[TMP114:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP115:%.*]] = load i32, i32* [[TMP114]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP115]]) +// CHECK1-NEXT: [[TMP116:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP116]]) +// CHECK1-NEXT: [[TMP117:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP118:%.*]] = load i32, i32* [[TMP117]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP118]]) // CHECK1-NEXT: ret void // // @@ -427,7 +433,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP4_I:%.*]] = alloca i8*, align 8 @@ -441,7 +447,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -454,29 +460,29 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load i8*, i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 // CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load i8**, i8*** [[TMP30]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i8*, i8** [[TMP31]], i64 9 // CHECK1-NEXT: [[TMP32:%.*]] = load i8*, i8** [[ARRAYIDX2_I]], align 8 @@ -487,10 +493,10 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 // CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint i8* [[TMP43]] to i64 diff --git a/clang/test/OpenMP/single_codegen.cpp b/clang/test/OpenMP/single_codegen.cpp --- a/clang/test/OpenMP/single_codegen.cpp +++ b/clang/test/OpenMP/single_codegen.cpp @@ -506,29 +506,30 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 // CHECK1-NEXT: [[A2:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store double 0.000000e+00, double* [[A]], align 8 // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], %struct.SST* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store double* [[A3]], double** [[A2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[A2]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP1]], double* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SST* [[THIS1]], i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SST* [[THIS1]], %struct.SST** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[A2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load double, double* [[TMP2]], align 8 +// CHECK1-NEXT: store double [[TMP3]], double* [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SST* noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 @@ -536,45 +537,48 @@ // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to double* -// CHECK1-NEXT: store double* [[CONV]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[TMP1]], double** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SST*, %struct.SST** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 8 +// CHECK1-NEXT: store double [[TMP4]], double* [[A]], align 8 +// CHECK1-NEXT: store double* [[A]], double** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP5]], double** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) +// CHECK1-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK1-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store %struct.SST* [[TMP0]], %struct.SST** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP8:%.*]] = load double*, double** [[_TMP1]], align 8 -// CHECK1-NEXT: store double* [[TMP8]], double** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SST* [[TMP2]], %struct.SST** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP1]], align 8 +// CHECK1-NEXT: store double* [[TMP12]], double** [[TMP11]], align 8 // CHECK1-NEXT: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv(%class.anon* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast double* [[TMP10]] to i8* -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i64 8, i8* [[TMP12]], void (i8*, i8*)* @.omp.copyprivate.copy_func.5, i32 [[TMP13]]) +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP14]] to i8* +// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK1-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.copyprivate.copy_func.5, i32 [[TMP17]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP14:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP15:%.*]] = extractvalue { i8*, i32 } [[TMP14]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP15]]) #[[ATTR13]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR13]] // CHECK1-NEXT: unreachable // // @@ -623,7 +627,7 @@ // CHECK1-SAME: (%class.anon.0* noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR10]] align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.0*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store %class.anon.0* [[THIS]], %class.anon.0** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load %class.anon.0*, %class.anon.0** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], %class.anon.0* [[THIS1]], i32 0, i32 0 @@ -633,58 +637,62 @@ // CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 8 // CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 // CHECK1-NEXT: store double [[INC]], double* [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP7]], double* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SST* [[TMP1]], i64 [[TMP8]]) +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SST* [[TMP1]], %struct.SST** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load double*, double** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load double, double* [[TMP8]], align 8 +// CHECK1-NEXT: store double [[TMP9]], double* [[TMP6]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SST* noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to double* -// CHECK1-NEXT: store double* [[CONV]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[TMP1]], double** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SST*, %struct.SST** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 8 +// CHECK1-NEXT: store double [[TMP4]], double* [[A]], align 8 +// CHECK1-NEXT: store double* [[A]], double** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP5]], double** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) +// CHECK1-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK1-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP7]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], double* [[TMP6]], align 8 -// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load double, double* [[TMP10]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], double* [[TMP10]], align 8 +// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast double* [[TMP9]] to i8* -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i64 8, i8* [[TMP11]], void (i8*, i8*)* @.omp.copyprivate.copy_func.7, i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* +// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK1-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i64 8, i8* [[TMP15]], void (i8*, i8*)* @.omp.copyprivate.copy_func.7, i32 [[TMP16]]) // CHECK1-NEXT: ret void // // @@ -718,9 +726,7 @@ // CHECK1-NEXT: [[A2:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[C7:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 @@ -745,148 +751,149 @@ // CHECK1-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2 // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C8]], align 8 // CHECK1-NEXT: store i32* [[TMP1]], i32** [[C7]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV9]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[C7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: [[CONV10:%.*]] = bitcast i64* [[C_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV10]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[C_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[B4]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[C7]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[TMP8]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32* -// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8 -// CHECK1-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK1-NEXT: store i32* [[TMP1]], i32** [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK1-NEXT: store i32* [[TMP2]], i32** [[_TMP5]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[C]], align 4 +// CHECK1-NEXT: store i32* [[A]], i32** [[TMP]], align 8 +// CHECK1-NEXT: store i32* [[C]], i32** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK1-NEXT: store i32* [[TMP9]], i32** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK1-NEXT: store i32* [[TMP10]], i32** [[_TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK1-NEXT: store i32* [[TMP9]], i32** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[CONV1]], i32** [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP5]], align 8 -// CHECK1-NEXT: store i32* [[TMP12]], i32** [[TMP11]], align 8 -// CHECK1-NEXT: invoke void @_ZZN2SSC1ERiENKUlvE_clEv(%class.anon.1* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK1-NEXT: store i32* [[TMP17]], i32** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[B]], i32** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK1-NEXT: store i32* [[TMP20]], i32** [[TMP19]], align 8 +// CHECK1-NEXT: invoke void @_ZZN2SSC1ERiENKUlvE_clEv(%class.anon.3* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to i8* -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[CONV1]] to i8* -// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[TMP19]] to i8* -// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i64 24, i8* [[TMP21]], void (i8*, i8*)* @.omp.copyprivate.copy_func.9, i32 [[TMP22]]) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to i8* +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i32* [[B]] to i8* +// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP27]] to i8* +// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK1-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i64 24, i8* [[TMP29]], void (i8*, i8*)* @.omp.copyprivate.copy_func.9, i32 [[TMP30]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP23:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP31:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP24:%.*]] = extractvalue { i8*, i32 } [[TMP23]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP24]]) #[[ATTR13]] +// CHECK1-NEXT: [[TMP32:%.*]] = extractvalue { i8*, i32 } [[TMP31]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP32]]) #[[ATTR13]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@_ZZN2SSC1ERiENKUlvE_clEv -// CHECK1-SAME: (%class.anon.1* noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR10]] align 2 { +// CHECK1-SAME: (%class.anon.3* noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR10]] align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.1*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: store %class.anon.1* [[THIS]], %class.anon.1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %class.anon.1*, %class.anon.1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], %class.anon.1* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.3*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// CHECK1-NEXT: store %class.anon.3* [[THIS]], %class.anon.3** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %class.anon.3*, %class.anon.3** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_3:%.*]], %class.anon.3* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load %struct.SS*, %struct.SS** [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 // CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[THIS1]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 // CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1 // CHECK1-NEXT: store i32 [[DEC]], i32* [[TMP6]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[THIS1]], i32 0, i32 3 // CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP8]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[DIV]], i32* [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[C_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP21]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[C_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]]) +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP1]], %struct.SS** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[THIS1]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[THIS1]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[THIS1]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[TMP20]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // @@ -929,72 +936,77 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32* -// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8 -// CHECK1-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK1-NEXT: store i32* [[TMP1]], i32** [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK1-NEXT: store i32* [[TMP2]], i32** [[_TMP5]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[C]], align 4 +// CHECK1-NEXT: store i32* [[A]], i32** [[TMP]], align 8 +// CHECK1-NEXT: store i32* [[C]], i32** [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK1-NEXT: store i32* [[TMP9]], i32** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK1-NEXT: store i32* [[TMP10]], i32** [[_TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1 -// CHECK1-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[DIV]], i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP15]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP17]], -1 +// CHECK1-NEXT: store i32 [[DEC]], i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[DIV]], i32* [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i32* [[CONV1]] to i8* -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to i8* -// CHECK1-NEXT: store i8* [[TMP19]], i8** [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i64 24, i8* [[TMP20]], void (i8*, i8*)* @.omp.copyprivate.copy_func.11, i32 [[TMP21]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to i8* +// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i32* [[B]] to i8* +// CHECK1-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to i8* +// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK1-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i64 24, i8* [[TMP28]], void (i8*, i8*)* @.omp.copyprivate.copy_func.11, i32 [[TMP29]]) // CHECK1-NEXT: ret void // // @@ -1039,36 +1051,40 @@ // CHECK1-LABEL: define {{[^@]+}}@_Z15parallel_singlev // CHECK1-SAME: () #[[ATTR10]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..12 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: // CHECK1-NEXT: invoke void @_Z3foov() // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP5:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP5]]) #[[ATTR13]] +// CHECK1-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP6]]) #[[ATTR13]] // CHECK1-NEXT: unreachable // // @@ -1458,36 +1474,40 @@ // CHECK2-LABEL: define {{[^@]+}}@_Z15parallel_singlev // CHECK2-SAME: () #[[ATTR10]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR12:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: // CHECK2-NEXT: invoke void @_Z3foov() // CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK2: invoke.cont: -// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK2-NEXT: ret void // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 } +// CHECK2-NEXT: [[TMP5:%.*]] = landingpad { i8*, i32 } // CHECK2-NEXT: catch i8* null -// CHECK2-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP5]]) #[[ATTR13]] +// CHECK2-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0 +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP6]]) #[[ATTR13]] // CHECK2-NEXT: unreachable // // @@ -1519,9 +1539,7 @@ // CHECK2-NEXT: [[A2:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[C7:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK2-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8 // CHECK2-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 @@ -1546,100 +1564,104 @@ // CHECK2-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2 // CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C8]], align 8 // CHECK2-NEXT: store i32* [[TMP1]], i32** [[C7]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A2]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV9]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[C7]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK2-NEXT: [[CONV10:%.*]] = bitcast i64* [[C_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP8]], i32* [[CONV10]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[C_CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]]) +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[B4]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[C7]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[TMP8]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[_TMP4:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[_TMP5:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 // CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32* -// CHECK2-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8 -// CHECK2-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK2-NEXT: store i32* [[TMP1]], i32** [[_TMP4]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK2-NEXT: store i32* [[TMP2]], i32** [[_TMP5]], align 8 +// CHECK2-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[B]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK2-NEXT: store i32 [[TMP8]], i32* [[C]], align 4 +// CHECK2-NEXT: store i32* [[A]], i32** [[TMP]], align 8 +// CHECK2-NEXT: store i32* [[C]], i32** [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK2-NEXT: store i32* [[TMP9]], i32** [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK2-NEXT: store i32* [[TMP10]], i32** [[_TMP3]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK2-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0 -// CHECK2-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP7]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK2-NEXT: store i32* [[TMP9]], i32** [[TMP8]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 2 -// CHECK2-NEXT: store i32* [[CONV1]], i32** [[TMP10]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 3 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP5]], align 8 -// CHECK2-NEXT: store i32* [[TMP12]], i32** [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0 +// CHECK2-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP15]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK2-NEXT: store i32* [[TMP17]], i32** [[TMP16]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 2 +// CHECK2-NEXT: store i32* [[B]], i32** [[TMP18]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK2-NEXT: store i32* [[TMP20]], i32** [[TMP19]], align 8 // CHECK2-NEXT: invoke void @_ZZN2SSC1ERiENKUlvE_clEv(%class.anon* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK2: invoke.cont: -// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to i8* -// CHECK2-NEXT: store i8* [[TMP15]], i8** [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP17:%.*]] = bitcast i32* [[CONV1]] to i8* -// CHECK2-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32*, i32** [[_TMP5]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = bitcast i32* [[TMP19]] to i8* -// CHECK2-NEXT: store i8* [[TMP20]], i8** [[TMP18]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i64 24, i8* [[TMP21]], void (i8*, i8*)* @.omp.copyprivate.copy_func.6, i32 [[TMP22]]) +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to i8* +// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP21]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 +// CHECK2-NEXT: [[TMP25:%.*]] = bitcast i32* [[B]] to i8* +// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP27]] to i8* +// CHECK2-NEXT: store i8* [[TMP28]], i8** [[TMP26]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK2-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i64 24, i8* [[TMP29]], void (i8*, i8*)* @.omp.copyprivate.copy_func.6, i32 [[TMP30]]) // CHECK2-NEXT: ret void // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP23:%.*]] = landingpad { i8*, i32 } +// CHECK2-NEXT: [[TMP31:%.*]] = landingpad { i8*, i32 } // CHECK2-NEXT: catch i8* null -// CHECK2-NEXT: [[TMP24:%.*]] = extractvalue { i8*, i32 } [[TMP23]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP24]]) #[[ATTR13]] +// CHECK2-NEXT: [[TMP32:%.*]] = extractvalue { i8*, i32 } [[TMP31]], 0 +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP32]]) #[[ATTR13]] // CHECK2-NEXT: unreachable // // @@ -1647,9 +1669,7 @@ // CHECK2-SAME: (%class.anon* noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR10]] align 2 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon*, align 8 -// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK2-NEXT: store %class.anon* [[THIS]], %class.anon** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[THIS1:%.*]] = load %class.anon*, %class.anon** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON:%.*]], %class.anon* [[THIS1]], i32 0, i32 0 @@ -1669,25 +1689,24 @@ // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK2-NEXT: store i32 [[DIV]], i32* [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP13]], i32* [[CONV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32*, i32** [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP17]], i32* [[CONV2]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 3 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP19]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[C_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP21]], i32* [[CONV3]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[C_CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.SS* [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]]) +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store %struct.SS* [[TMP1]], %struct.SS** [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], i32* [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP21]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK2-NEXT: store i32 [[TMP23]], i32* [[TMP20]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // @@ -1730,72 +1749,77 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[_TMP4:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[_TMP5:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32* -// CHECK2-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8 -// CHECK2-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK2-NEXT: store i32* [[TMP1]], i32** [[_TMP4]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK2-NEXT: store i32* [[TMP2]], i32** [[_TMP5]], align 8 +// CHECK2-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[B]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK2-NEXT: store i32 [[TMP8]], i32* [[C]], align 4 +// CHECK2-NEXT: store i32* [[A]], i32** [[TMP]], align 8 +// CHECK2-NEXT: store i32* [[C]], i32** [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK2-NEXT: store i32* [[TMP9]], i32** [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK2-NEXT: store i32* [[TMP10]], i32** [[_TMP3]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK2-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[INC]], i32* [[TMP7]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK2-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1 -// CHECK2-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP5]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1 -// CHECK2-NEXT: store i32 [[DIV]], i32* [[TMP10]], align 4 -// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP15:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: store i32 [[INC]], i32* [[TMP15]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[B]], align 4 +// CHECK2-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP17]], -1 +// CHECK2-NEXT: store i32 [[DEC]], i32* [[B]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP19]], 1 +// CHECK2-NEXT: store i32 [[DIV]], i32* [[TMP18]], align 4 +// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP12]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP16:%.*]] = bitcast i32* [[CONV1]] to i8* -// CHECK2-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP5]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to i8* -// CHECK2-NEXT: store i8* [[TMP19]], i8** [[TMP17]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i64 24, i8* [[TMP20]], void (i8*, i8*)* @.omp.copyprivate.copy_func.8, i32 [[TMP21]]) +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to i8* +// CHECK2-NEXT: store i8* [[TMP22]], i8** [[TMP20]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 +// CHECK2-NEXT: [[TMP24:%.*]] = bitcast i32* [[B]] to i8* +// CHECK2-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to i8* +// CHECK2-NEXT: store i8* [[TMP27]], i8** [[TMP25]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK2-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i64 24, i8* [[TMP28]], void (i8*, i8*)* @.omp.copyprivate.copy_func.8, i32 [[TMP29]]) // CHECK2-NEXT: ret void // // @@ -1842,94 +1866,98 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 // CHECK2-NEXT: [[A2:%.*]] = alloca double*, align 8 -// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK2-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[THIS1:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[THIS1]], i32 0, i32 0 // CHECK2-NEXT: store double 0.000000e+00, double* [[A]], align 8 // CHECK2-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], %struct.SST* [[THIS1]], i32 0, i32 0 // CHECK2-NEXT: store double* [[A3]], double** [[A2]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load double*, double** [[A2]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to double* -// CHECK2-NEXT: store double [[TMP1]], double* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.SST* [[THIS1]], i64 [[TMP2]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store %struct.SST* [[THIS1]], %struct.SST** [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP2:%.*]] = load double*, double** [[A2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load double, double* [[TMP2]], align 8 +// CHECK2-NEXT: store double [[TMP3]], double* [[TMP1]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SST* noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK2-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 // CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 // CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to double* -// CHECK2-NEXT: store double* [[CONV]], double** [[TMP]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK2-NEXT: store double* [[TMP1]], double** [[_TMP1]], align 8 +// CHECK2-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load %struct.SST*, %struct.SST** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 8 +// CHECK2-NEXT: store double [[TMP4]], double* [[A]], align 8 +// CHECK2-NEXT: store double* [[A]], double** [[TMP]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK2-NEXT: store double* [[TMP5]], double** [[_TMP1]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) +// CHECK2-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK2-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK2-NEXT: store %struct.SST* [[TMP0]], %struct.SST** [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP8:%.*]] = load double*, double** [[_TMP1]], align 8 -// CHECK2-NEXT: store double* [[TMP8]], double** [[TMP7]], align 8 -// CHECK2-NEXT: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv(%class.anon.0* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 +// CHECK2-NEXT: store %struct.SST* [[TMP2]], %struct.SST** [[TMP10]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP1]], align 8 +// CHECK2-NEXT: store double* [[TMP12]], double** [[TMP11]], align 8 +// CHECK2-NEXT: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv(%class.anon.4* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK2: invoke.cont: -// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP1]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = bitcast double* [[TMP10]] to i8* -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i64 8, i8* [[TMP12]], void (i8*, i8*)* @.omp.copyprivate.copy_func.10, i32 [[TMP13]]) +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP14]] to i8* +// CHECK2-NEXT: store i8* [[TMP15]], i8** [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK2-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.copyprivate.copy_func.10, i32 [[TMP17]]) // CHECK2-NEXT: ret void // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP14:%.*]] = landingpad { i8*, i32 } +// CHECK2-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK2-NEXT: catch i8* null -// CHECK2-NEXT: [[TMP15:%.*]] = extractvalue { i8*, i32 } [[TMP14]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP15]]) #[[ATTR13]] +// CHECK2-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR13]] // CHECK2-NEXT: unreachable // // // CHECK2-LABEL: define {{[^@]+}}@_ZZN3SSTIdEC1EvENKUlvE_clEv -// CHECK2-SAME: (%class.anon.0* noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] align 2 { +// CHECK2-SAME: (%class.anon.4* noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] align 2 { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.0*, align 8 -// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 -// CHECK2-NEXT: store %class.anon.0* [[THIS]], %class.anon.0** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[THIS1:%.*]] = load %class.anon.0*, %class.anon.0** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], %class.anon.0* [[THIS1]], i32 0, i32 0 +// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.4*, align 8 +// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_5:%.*]], align 8 +// CHECK2-NEXT: store %class.anon.4* [[THIS]], %class.anon.4** [[THIS_ADDR]], align 8 +// CHECK2-NEXT: [[THIS1:%.*]] = load %class.anon.4*, %class.anon.4** [[THIS_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_4:%.*]], %class.anon.4* [[THIS1]], i32 0, i32 0 // CHECK2-NEXT: [[TMP1:%.*]] = load %struct.SST*, %struct.SST** [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 0 // CHECK2-NEXT: store %struct.SST* [[TMP1]], %struct.SST** [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[THIS1]], i32 0, i32 1 // CHECK2-NEXT: [[TMP5:%.*]] = load double*, double** [[TMP4]], align 8 // CHECK2-NEXT: store double* [[TMP5]], double** [[TMP3]], align 8 -// CHECK2-NEXT: call void @_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv(%class.anon.1* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) +// CHECK2-NEXT: call void @_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv(%class.anon.5* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK2-NEXT: ret void // // @@ -1956,71 +1984,75 @@ // // // CHECK2-LABEL: define {{[^@]+}}@_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv -// CHECK2-SAME: (%class.anon.1* noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR10]] align 2 { +// CHECK2-SAME: (%class.anon.5* noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR10]] align 2 { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.1*, align 8 -// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: store %class.anon.1* [[THIS]], %class.anon.1** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[THIS1:%.*]] = load %class.anon.1*, %class.anon.1** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], %class.anon.1* [[THIS1]], i32 0, i32 0 +// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.5*, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 +// CHECK2-NEXT: store %class.anon.5* [[THIS]], %class.anon.5** [[THIS_ADDR]], align 8 +// CHECK2-NEXT: [[THIS1:%.*]] = load %class.anon.5*, %class.anon.5** [[THIS_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_5:%.*]], %class.anon.5* [[THIS1]], i32 0, i32 0 // CHECK2-NEXT: [[TMP1:%.*]] = load %struct.SST*, %struct.SST** [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[THIS1]], i32 0, i32 1 // CHECK2-NEXT: [[TMP3:%.*]] = load double*, double** [[TMP2]], align 8 // CHECK2-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 8 // CHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 // CHECK2-NEXT: store double [[INC]], double* [[TMP3]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to double* -// CHECK2-NEXT: store double [[TMP7]], double* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.SST* [[TMP1]], i64 [[TMP8]]) +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store %struct.SST* [[TMP1]], %struct.SST** [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[THIS1]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP8:%.*]] = load double*, double** [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load double, double* [[TMP8]], align 8 +// CHECK2-NEXT: store double [[TMP9]], double* [[TMP6]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SST* noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK2-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 // CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to double* -// CHECK2-NEXT: store double* [[CONV]], double** [[TMP]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK2-NEXT: store double* [[TMP1]], double** [[_TMP1]], align 8 +// CHECK2-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load %struct.SST*, %struct.SST** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 8 +// CHECK2-NEXT: store double [[TMP4]], double* [[A]], align 8 +// CHECK2-NEXT: store double* [[A]], double** [[TMP]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK2-NEXT: store double* [[TMP5]], double** [[_TMP1]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) +// CHECK2-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK2-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[_TMP1]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8 -// CHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP7]], 1.000000e+00 -// CHECK2-NEXT: store double [[INC]], double* [[TMP6]], align 8 -// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK2-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load double, double* [[TMP10]], align 8 +// CHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK2-NEXT: store double [[INC]], double* [[TMP10]], align 8 +// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load double*, double** [[_TMP1]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = bitcast double* [[TMP9]] to i8* -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i64 8, i8* [[TMP11]], void (i8*, i8*)* @.omp.copyprivate.copy_func.12, i32 [[TMP12]]) +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* +// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK2-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i64 8, i8* [[TMP15]], void (i8*, i8*)* @.omp.copyprivate.copy_func.12, i32 [[TMP16]]) // CHECK2-NEXT: ret void // // @@ -2420,36 +2452,40 @@ // CHECK4-LABEL: define {{[^@]+}}@_Z15parallel_singlev // CHECK4-SAME: () #[[ATTR10]] { // CHECK4-NEXT: entry: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR12:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK4-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK4-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: // CHECK4-NEXT: invoke void @_Z3foov() // CHECK4-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK4: invoke.cont: -// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK4-NEXT: ret void // CHECK4: terminate.lpad: -// CHECK4-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 } +// CHECK4-NEXT: [[TMP5:%.*]] = landingpad { i8*, i32 } // CHECK4-NEXT: catch i8* null -// CHECK4-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0 -// CHECK4-NEXT: call void @__clang_call_terminate(i8* [[TMP5]]) #[[ATTR13]] +// CHECK4-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0 +// CHECK4-NEXT: call void @__clang_call_terminate(i8* [[TMP6]]) #[[ATTR13]] // CHECK4-NEXT: unreachable // // @@ -2481,9 +2517,7 @@ // CHECK4-NEXT: [[A2:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[C7:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK4-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 @@ -2508,100 +2542,104 @@ // CHECK4-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2 // CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C8]], align 8 // CHECK4-NEXT: store i32* [[TMP1]], i32** [[C7]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A2]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[B4]], align 4 -// CHECK4-NEXT: [[CONV9:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP5]], i32* [[CONV9]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[C7]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK4-NEXT: [[CONV10:%.*]] = bitcast i64* [[C_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP8]], i32* [[CONV10]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i64, i64* [[C_CASTED]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]]) +// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP2]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A2]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], i32* [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[B4]], align 4 +// CHECK4-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32*, i32** [[C7]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], i32* [[TMP8]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[_TMP4:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[_TMP5:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 // CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK4-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32* -// CHECK4-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8 -// CHECK4-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK4-NEXT: store i32* [[TMP1]], i32** [[_TMP4]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK4-NEXT: store i32* [[TMP2]], i32** [[_TMP5]], align 8 +// CHECK4-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK4-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], i32* [[B]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK4-NEXT: store i32 [[TMP8]], i32* [[C]], align 4 +// CHECK4-NEXT: store i32* [[A]], i32** [[TMP]], align 8 +// CHECK4-NEXT: store i32* [[C]], i32** [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK4-NEXT: store i32* [[TMP9]], i32** [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK4-NEXT: store i32* [[TMP10]], i32** [[_TMP3]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK4-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK4-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK4-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK4-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0 -// CHECK4-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP7]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK4-NEXT: store i32* [[TMP9]], i32** [[TMP8]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 2 -// CHECK4-NEXT: store i32* [[CONV1]], i32** [[TMP10]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP5]], align 8 -// CHECK4-NEXT: store i32* [[TMP12]], i32** [[TMP11]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0 +// CHECK4-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP15]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK4-NEXT: store i32* [[TMP17]], i32** [[TMP16]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 2 +// CHECK4-NEXT: store i32* [[B]], i32** [[TMP18]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK4-NEXT: store i32* [[TMP20]], i32** [[TMP19]], align 8 // CHECK4-NEXT: invoke void @_ZZN2SSC1ERiENKUlvE_clEv(%class.anon* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK4-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK4: invoke.cont: -// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to i8* -// CHECK4-NEXT: store i8* [[TMP15]], i8** [[TMP13]], align 8 -// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP17:%.*]] = bitcast i32* [[CONV1]] to i8* -// CHECK4-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32*, i32** [[_TMP5]], align 8 -// CHECK4-NEXT: [[TMP20:%.*]] = bitcast i32* [[TMP19]] to i8* -// CHECK4-NEXT: store i8* [[TMP20]], i8** [[TMP18]], align 8 -// CHECK4-NEXT: [[TMP21:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i64 24, i8* [[TMP21]], void (i8*, i8*)* @.omp.copyprivate.copy_func.5, i32 [[TMP22]]) +// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to i8* +// CHECK4-NEXT: store i8* [[TMP23]], i8** [[TMP21]], align 8 +// CHECK4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 +// CHECK4-NEXT: [[TMP25:%.*]] = bitcast i32* [[B]] to i8* +// CHECK4-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 +// CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP27]] to i8* +// CHECK4-NEXT: store i8* [[TMP28]], i8** [[TMP26]], align 8 +// CHECK4-NEXT: [[TMP29:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK4-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i64 24, i8* [[TMP29]], void (i8*, i8*)* @.omp.copyprivate.copy_func.5, i32 [[TMP30]]) // CHECK4-NEXT: ret void // CHECK4: terminate.lpad: -// CHECK4-NEXT: [[TMP23:%.*]] = landingpad { i8*, i32 } +// CHECK4-NEXT: [[TMP31:%.*]] = landingpad { i8*, i32 } // CHECK4-NEXT: catch i8* null -// CHECK4-NEXT: [[TMP24:%.*]] = extractvalue { i8*, i32 } [[TMP23]], 0 -// CHECK4-NEXT: call void @__clang_call_terminate(i8* [[TMP24]]) #[[ATTR13]] +// CHECK4-NEXT: [[TMP32:%.*]] = extractvalue { i8*, i32 } [[TMP31]], 0 +// CHECK4-NEXT: call void @__clang_call_terminate(i8* [[TMP32]]) #[[ATTR13]] // CHECK4-NEXT: unreachable // // @@ -2609,9 +2647,7 @@ // CHECK4-SAME: (%class.anon* noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR10]] align 2 { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon*, align 8 -// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK4-NEXT: store %class.anon* [[THIS]], %class.anon** [[THIS_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load %class.anon*, %class.anon** [[THIS_ADDR]], align 8 // CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON:%.*]], %class.anon* [[THIS1]], i32 0, i32 0 @@ -2631,25 +2667,24 @@ // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP9]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP13]], i32* [[CONV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32*, i32** [[TMP15]], align 8 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK4-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP17]], i32* [[CONV2]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP19]], align 8 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK4-NEXT: [[CONV3:%.*]] = bitcast i64* [[C_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP21]], i32* [[CONV3]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i64, i64* [[C_CASTED]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]]) +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store %struct.SS* [[TMP1]], %struct.SS** [[TMP11]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], i32* [[TMP12]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK4-NEXT: store i32 [[TMP19]], i32* [[TMP16]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[THIS1]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP21]], align 8 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK4-NEXT: store i32 [[TMP23]], i32* [[TMP20]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // @@ -2692,72 +2727,77 @@ // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[_TMP4:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[_TMP5:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK4-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32* -// CHECK4-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8 -// CHECK4-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK4-NEXT: store i32* [[TMP1]], i32** [[_TMP4]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK4-NEXT: store i32* [[TMP2]], i32** [[_TMP5]], align 8 +// CHECK4-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK4-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], i32* [[B]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK4-NEXT: store i32 [[TMP8]], i32* [[C]], align 4 +// CHECK4-NEXT: store i32* [[A]], i32** [[TMP]], align 8 +// CHECK4-NEXT: store i32* [[C]], i32** [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK4-NEXT: store i32* [[TMP9]], i32** [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK4-NEXT: store i32* [[TMP10]], i32** [[_TMP3]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK4-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK4-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK4-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK4-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK4-NEXT: store i32 [[INC]], i32* [[TMP7]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1 -// CHECK4-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP5]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP10]], align 4 -// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: store i32 [[INC]], i32* [[TMP15]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[B]], align 4 +// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP17]], -1 +// CHECK4-NEXT: store i32 [[DEC]], i32* [[B]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP19]], 1 +// CHECK4-NEXT: store i32 [[DIV]], i32* [[TMP18]], align 4 +// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: -// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP4]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK4-NEXT: store i8* [[TMP14]], i8** [[TMP12]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP16:%.*]] = bitcast i32* [[CONV1]] to i8* -// CHECK4-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP5]], align 8 -// CHECK4-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to i8* -// CHECK4-NEXT: store i8* [[TMP19]], i8** [[TMP17]], align 8 -// CHECK4-NEXT: [[TMP20:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i64 24, i8* [[TMP20]], void (i8*, i8*)* @.omp.copyprivate.copy_func.7, i32 [[TMP21]]) +// CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to i8* +// CHECK4-NEXT: store i8* [[TMP22]], i8** [[TMP20]], align 8 +// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 +// CHECK4-NEXT: [[TMP24:%.*]] = bitcast i32* [[B]] to i8* +// CHECK4-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 +// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to i8* +// CHECK4-NEXT: store i8* [[TMP27]], i8** [[TMP25]], align 8 +// CHECK4-NEXT: [[TMP28:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK4-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i64 24, i8* [[TMP28]], void (i8*, i8*)* @.omp.copyprivate.copy_func.7, i32 [[TMP29]]) // CHECK4-NEXT: ret void // // @@ -2804,94 +2844,98 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 // CHECK4-NEXT: [[A2:%.*]] = alloca double*, align 8 -// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK4-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[THIS1]], i32 0, i32 0 // CHECK4-NEXT: store double 0.000000e+00, double* [[A]], align 8 // CHECK4-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], %struct.SST* [[THIS1]], i32 0, i32 0 // CHECK4-NEXT: store double* [[A3]], double** [[A2]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load double*, double** [[A2]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to double* -// CHECK4-NEXT: store double [[TMP1]], double* [[CONV]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.SST* [[THIS1]], i64 [[TMP2]]) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store %struct.SST* [[THIS1]], %struct.SST** [[TMP0]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP2:%.*]] = load double*, double** [[A2]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load double, double* [[TMP2]], align 8 +// CHECK4-NEXT: store double [[TMP3]], double* [[TMP1]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SST* noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK4-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 // CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK4-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 // CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to double* -// CHECK4-NEXT: store double* [[CONV]], double** [[TMP]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK4-NEXT: store double* [[TMP1]], double** [[_TMP1]], align 8 +// CHECK4-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load %struct.SST*, %struct.SST** [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 8 +// CHECK4-NEXT: store double [[TMP4]], double* [[A]], align 8 +// CHECK4-NEXT: store double* [[A]], double** [[TMP]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK4-NEXT: store double* [[TMP5]], double** [[_TMP1]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK4-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK4-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) +// CHECK4-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK4-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK4-NEXT: store %struct.SST* [[TMP0]], %struct.SST** [[TMP6]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP8:%.*]] = load double*, double** [[_TMP1]], align 8 -// CHECK4-NEXT: store double* [[TMP8]], double** [[TMP7]], align 8 -// CHECK4-NEXT: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv(%class.anon.0* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) +// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 +// CHECK4-NEXT: store %struct.SST* [[TMP2]], %struct.SST** [[TMP10]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP1]], align 8 +// CHECK4-NEXT: store double* [[TMP12]], double** [[TMP11]], align 8 +// CHECK4-NEXT: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv(%class.anon.4* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK4-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK4: invoke.cont: -// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: -// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = bitcast double* [[TMP10]] to i8* -// CHECK4-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i64 8, i8* [[TMP12]], void (i8*, i8*)* @.omp.copyprivate.copy_func.9, i32 [[TMP13]]) +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP14]] to i8* +// CHECK4-NEXT: store i8* [[TMP15]], i8** [[TMP13]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK4-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.copyprivate.copy_func.9, i32 [[TMP17]]) // CHECK4-NEXT: ret void // CHECK4: terminate.lpad: -// CHECK4-NEXT: [[TMP14:%.*]] = landingpad { i8*, i32 } +// CHECK4-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK4-NEXT: catch i8* null -// CHECK4-NEXT: [[TMP15:%.*]] = extractvalue { i8*, i32 } [[TMP14]], 0 -// CHECK4-NEXT: call void @__clang_call_terminate(i8* [[TMP15]]) #[[ATTR13]] +// CHECK4-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK4-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR13]] // CHECK4-NEXT: unreachable // // // CHECK4-LABEL: define {{[^@]+}}@_ZZN3SSTIdEC1EvENKUlvE_clEv -// CHECK4-SAME: (%class.anon.0* noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] align 2 { +// CHECK4-SAME: (%class.anon.4* noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] align 2 { // CHECK4-NEXT: entry: -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.0*, align 8 -// CHECK4-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 -// CHECK4-NEXT: store %class.anon.0* [[THIS]], %class.anon.0** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[THIS1:%.*]] = load %class.anon.0*, %class.anon.0** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], %class.anon.0* [[THIS1]], i32 0, i32 0 +// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.4*, align 8 +// CHECK4-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_5:%.*]], align 8 +// CHECK4-NEXT: store %class.anon.4* [[THIS]], %class.anon.4** [[THIS_ADDR]], align 8 +// CHECK4-NEXT: [[THIS1:%.*]] = load %class.anon.4*, %class.anon.4** [[THIS_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_4:%.*]], %class.anon.4* [[THIS1]], i32 0, i32 0 // CHECK4-NEXT: [[TMP1:%.*]] = load %struct.SST*, %struct.SST** [[TMP0]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 0 // CHECK4-NEXT: store %struct.SST* [[TMP1]], %struct.SST** [[TMP2]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[THIS1]], i32 0, i32 1 // CHECK4-NEXT: [[TMP5:%.*]] = load double*, double** [[TMP4]], align 8 // CHECK4-NEXT: store double* [[TMP5]], double** [[TMP3]], align 8 -// CHECK4-NEXT: call void @_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv(%class.anon.1* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) +// CHECK4-NEXT: call void @_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv(%class.anon.5* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK4-NEXT: ret void // // @@ -2918,71 +2962,75 @@ // // // CHECK4-LABEL: define {{[^@]+}}@_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv -// CHECK4-SAME: (%class.anon.1* noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR10]] align 2 { +// CHECK4-SAME: (%class.anon.5* noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR10]] align 2 { // CHECK4-NEXT: entry: -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.1*, align 8 -// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: store %class.anon.1* [[THIS]], %class.anon.1** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[THIS1:%.*]] = load %class.anon.1*, %class.anon.1** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], %class.anon.1* [[THIS1]], i32 0, i32 0 +// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.5*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 +// CHECK4-NEXT: store %class.anon.5* [[THIS]], %class.anon.5** [[THIS_ADDR]], align 8 +// CHECK4-NEXT: [[THIS1:%.*]] = load %class.anon.5*, %class.anon.5** [[THIS_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_5:%.*]], %class.anon.5* [[THIS1]], i32 0, i32 0 // CHECK4-NEXT: [[TMP1:%.*]] = load %struct.SST*, %struct.SST** [[TMP0]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[THIS1]], i32 0, i32 1 // CHECK4-NEXT: [[TMP3:%.*]] = load double*, double** [[TMP2]], align 8 // CHECK4-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 8 // CHECK4-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 // CHECK4-NEXT: store double [[INC]], double* [[TMP3]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to double* -// CHECK4-NEXT: store double [[TMP7]], double* [[CONV]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SST* [[TMP1]], i64 [[TMP8]]) +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store %struct.SST* [[TMP1]], %struct.SST** [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[THIS1]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP8:%.*]] = load double*, double** [[TMP7]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load double, double* [[TMP8]], align 8 +// CHECK4-NEXT: store double [[TMP9]], double* [[TMP6]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SST* noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK4-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 // CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to double* -// CHECK4-NEXT: store double* [[CONV]], double** [[TMP]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK4-NEXT: store double* [[TMP1]], double** [[_TMP1]], align 8 +// CHECK4-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load %struct.SST*, %struct.SST** [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 8 +// CHECK4-NEXT: store double [[TMP4]], double* [[A]], align 8 +// CHECK4-NEXT: store double* [[A]], double** [[TMP]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK4-NEXT: store double* [[TMP5]], double** [[_TMP1]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK4-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK4-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) +// CHECK4-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK4-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP6:%.*]] = load double*, double** [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8 -// CHECK4-NEXT: [[INC:%.*]] = fadd double [[TMP7]], 1.000000e+00 -// CHECK4-NEXT: store double [[INC]], double* [[TMP6]], align 8 -// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK4-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load double, double* [[TMP10]], align 8 +// CHECK4-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK4-NEXT: store double [[INC]], double* [[TMP10]], align 8 +// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: -// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP9:%.*]] = load double*, double** [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = bitcast double* [[TMP9]] to i8* -// CHECK4-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i64 8, i8* [[TMP11]], void (i8*, i8*)* @.omp.copyprivate.copy_func.11, i32 [[TMP12]]) +// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* +// CHECK4-NEXT: store i8* [[TMP14]], i8** [[TMP12]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8* +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK4-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i64 8, i8* [[TMP15]], void (i8*, i8*)* @.omp.copyprivate.copy_func.11, i32 [[TMP16]]) // CHECK4-NEXT: ret void // // @@ -3414,29 +3462,30 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 // CHECK5-NEXT: [[A2:%.*]] = alloca double*, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], %struct.SST* [[THIS1]], i32 0, i32 0, !dbg [[DBG83:![0-9]+]] // CHECK5-NEXT: store double 0.000000e+00, double* [[A]], align 8, !dbg [[DBG83]] // CHECK5-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], %struct.SST* [[THIS1]], i32 0, i32 0, !dbg [[DBG84:![0-9]+]] // CHECK5-NEXT: store double* [[A3]], double** [[A2]], align 8, !dbg [[DBG84]] -// CHECK5-NEXT: [[TMP0:%.*]] = load double*, double** [[A2]], align 8, !dbg [[DBG85:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load double, double* [[TMP0]], align 8, !dbg [[DBG86:![0-9]+]] -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to double*, !dbg [[DBG86]] -// CHECK5-NEXT: store double [[TMP1]], double* [[CONV]], align 8, !dbg [[DBG86]] -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8, !dbg [[DBG86]] -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB18:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SST* [[THIS1]], i64 [[TMP2]]), !dbg [[DBG86]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG85:![0-9]+]] +// CHECK5-NEXT: store %struct.SST* [[THIS1]], %struct.SST** [[TMP0]], align 8, !dbg [[DBG85]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG85]] +// CHECK5-NEXT: [[TMP2:%.*]] = load double*, double** [[A2]], align 8, !dbg [[DBG86:![0-9]+]] +// CHECK5-NEXT: [[TMP3:%.*]] = load double, double* [[TMP2]], align 8, !dbg [[DBG86]] +// CHECK5-NEXT: store double [[TMP3]], double* [[TMP1]], align 8, !dbg [[DBG85]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB18:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG85]] // CHECK5-NEXT: ret void, !dbg [[DBG87:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SST* noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG88:![0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG88:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 @@ -3444,45 +3493,48 @@ // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8, !dbg [[DBG89:![0-9]+]] -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to double*, !dbg [[DBG89]] -// CHECK5-NEXT: store double* [[CONV]], double** [[TMP]], align 8, !dbg [[DBG89]] -// CHECK5-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP]], align 8, !dbg [[DBG90:![0-9]+]] -// CHECK5-NEXT: store double* [[TMP1]], double** [[_TMP1]], align 8, !dbg [[DBG91:![0-9]+]] +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG89:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0, !dbg [[DBG89]] +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SST*, %struct.SST** [[TMP1]], align 8, !dbg [[DBG89]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1, !dbg [[DBG89]] +// CHECK5-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 8, !dbg [[DBG89]] +// CHECK5-NEXT: store double [[TMP4]], double* [[A]], align 8, !dbg [[DBG89]] +// CHECK5-NEXT: store double* [[A]], double** [[TMP]], align 8, !dbg [[DBG89]] +// CHECK5-NEXT: [[TMP5:%.*]] = load double*, double** [[TMP]], align 8, !dbg [[DBG90:![0-9]+]] +// CHECK5-NEXT: store double* [[TMP5]], double** [[_TMP1]], align 8, !dbg [[DBG91:![0-9]+]] // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG91]] -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG91]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4, !dbg [[DBG91]] -// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB16:[0-9]+]], i32 [[TMP3]]), !dbg [[DBG91]] -// CHECK5-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0, !dbg [[DBG91]] -// CHECK5-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG91]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG91]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !dbg [[DBG91]] +// CHECK5-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB16:[0-9]+]], i32 [[TMP7]]), !dbg [[DBG91]] +// CHECK5-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0, !dbg [[DBG91]] +// CHECK5-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG91]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0, !dbg [[DBG92:![0-9]+]] -// CHECK5-NEXT: store %struct.SST* [[TMP0]], %struct.SST** [[TMP6]], align 8, !dbg [[DBG92]] -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1, !dbg [[DBG92]] -// CHECK5-NEXT: [[TMP8:%.*]] = load double*, double** [[_TMP1]], align 8, !dbg [[DBG93:![0-9]+]] -// CHECK5-NEXT: store double* [[TMP8]], double** [[TMP7]], align 8, !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0, !dbg [[DBG92:![0-9]+]] +// CHECK5-NEXT: store %struct.SST* [[TMP2]], %struct.SST** [[TMP10]], align 8, !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1, !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP1]], align 8, !dbg [[DBG93:![0-9]+]] +// CHECK5-NEXT: store double* [[TMP12]], double** [[TMP11]], align 8, !dbg [[DBG92]] // CHECK5-NEXT: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv(%class.anon* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG92]] // CHECK5: invoke.cont: -// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB16]], i32 [[TMP3]]), !dbg [[DBG92]] +// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB16]], i32 [[TMP7]]), !dbg [[DBG92]] // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG92]] // CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG92]] // CHECK5: omp_if.end: -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG92]] -// CHECK5-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP1]], align 8, !dbg [[DBG94:![0-9]+]] -// CHECK5-NEXT: [[TMP11:%.*]] = bitcast double* [[TMP10]] to i8*, !dbg [[DBG92]] -// CHECK5-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 8, !dbg [[DBG92]] -// CHECK5-NEXT: [[TMP12:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*, !dbg [[DBG92]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG92]] -// CHECK5-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB16]], i32 [[TMP3]], i64 8, i8* [[TMP12]], void (i8*, i8*)* @.omp.copyprivate.copy_func.5, i32 [[TMP13]]), !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP1]], align 8, !dbg [[DBG94:![0-9]+]] +// CHECK5-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP14]] to i8*, !dbg [[DBG92]] +// CHECK5-NEXT: store i8* [[TMP15]], i8** [[TMP13]], align 8, !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*, !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG92]] +// CHECK5-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB16]], i32 [[TMP7]], i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.copyprivate.copy_func.5, i32 [[TMP17]]), !dbg [[DBG92]] // CHECK5-NEXT: ret void, !dbg [[DBG95:![0-9]+]] // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP14:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null, !dbg [[DBG92]] -// CHECK5-NEXT: [[TMP15:%.*]] = extractvalue { i8*, i32 } [[TMP14]], 0, !dbg [[DBG92]] -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP15]]) #[[ATTR13]], !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0, !dbg [[DBG92]] +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR13]], !dbg [[DBG92]] // CHECK5-NEXT: unreachable, !dbg [[DBG92]] // // @@ -3531,7 +3583,7 @@ // CHECK5-SAME: (%class.anon.0* noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR10]] align 2 !dbg [[DBG105:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.0*, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store %class.anon.0* [[THIS]], %class.anon.0** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load %class.anon.0*, %class.anon.0** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], %class.anon.0* [[THIS1]], i32 0, i32 0 @@ -3541,58 +3593,62 @@ // CHECK5-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 8, !dbg [[DBG107:![0-9]+]] // CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00, !dbg [[DBG107]] // CHECK5-NEXT: store double [[INC]], double* [[TMP3]], align 8, !dbg [[DBG107]] -// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1, !dbg [[DBG108:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 8, !dbg [[DBG108]] -// CHECK5-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8, !dbg [[DBG109:![0-9]+]] -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to double*, !dbg [[DBG109]] -// CHECK5-NEXT: store double [[TMP7]], double* [[CONV]], align 8, !dbg [[DBG109]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[A_CASTED]], align 8, !dbg [[DBG109]] -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB22:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SST* [[TMP1]], i64 [[TMP8]]), !dbg [[DBG109]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG108:![0-9]+]] +// CHECK5-NEXT: store %struct.SST* [[TMP1]], %struct.SST** [[TMP5]], align 8, !dbg [[DBG108]] +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG108]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[THIS1]], i32 0, i32 1, !dbg [[DBG109:![0-9]+]] +// CHECK5-NEXT: [[TMP8:%.*]] = load double*, double** [[TMP7]], align 8, !dbg [[DBG109]] +// CHECK5-NEXT: [[TMP9:%.*]] = load double, double* [[TMP8]], align 8, !dbg [[DBG109]] +// CHECK5-NEXT: store double [[TMP9]], double* [[TMP6]], align 8, !dbg [[DBG108]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB22:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG108]] // CHECK5-NEXT: ret void, !dbg [[DBG110:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SST* noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12]] !dbg [[DBG111:![0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] !dbg [[DBG111:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca double*, align 8 // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8, !dbg [[DBG112:![0-9]+]] -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to double*, !dbg [[DBG112]] -// CHECK5-NEXT: store double* [[CONV]], double** [[TMP]], align 8, !dbg [[DBG112]] -// CHECK5-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP]], align 8, !dbg [[DBG113:![0-9]+]] -// CHECK5-NEXT: store double* [[TMP1]], double** [[_TMP1]], align 8, !dbg [[DBG114:![0-9]+]] +// CHECK5-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG112:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0, !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SST*, %struct.SST** [[TMP1]], align 8, !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1, !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 8, !dbg [[DBG112]] +// CHECK5-NEXT: store double [[TMP4]], double* [[A]], align 8, !dbg [[DBG112]] +// CHECK5-NEXT: store double* [[A]], double** [[TMP]], align 8, !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP5:%.*]] = load double*, double** [[TMP]], align 8, !dbg [[DBG113:![0-9]+]] +// CHECK5-NEXT: store double* [[TMP5]], double** [[_TMP1]], align 8, !dbg [[DBG114:![0-9]+]] // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG114]] -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG114]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4, !dbg [[DBG114]] -// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB20:[0-9]+]], i32 [[TMP3]]), !dbg [[DBG114]] -// CHECK5-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0, !dbg [[DBG114]] -// CHECK5-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG114]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG114]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !dbg [[DBG114]] +// CHECK5-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB20:[0-9]+]], i32 [[TMP7]]), !dbg [[DBG114]] +// CHECK5-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0, !dbg [[DBG114]] +// CHECK5-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG114]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP6:%.*]] = load double*, double** [[_TMP1]], align 8, !dbg [[DBG113]] -// CHECK5-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 8, !dbg [[DBG115:![0-9]+]] -// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP7]], 1.000000e+00, !dbg [[DBG115]] -// CHECK5-NEXT: store double [[INC]], double* [[TMP6]], align 8, !dbg [[DBG115]] -// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB20]], i32 [[TMP3]]), !dbg [[DBG115]] +// CHECK5-NEXT: [[TMP10:%.*]] = load double*, double** [[_TMP1]], align 8, !dbg [[DBG113]] +// CHECK5-NEXT: [[TMP11:%.*]] = load double, double* [[TMP10]], align 8, !dbg [[DBG115:![0-9]+]] +// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00, !dbg [[DBG115]] +// CHECK5-NEXT: store double [[INC]], double* [[TMP10]], align 8, !dbg [[DBG115]] +// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB20]], i32 [[TMP7]]), !dbg [[DBG115]] // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG115]] // CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG115]] // CHECK5: omp_if.end: -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG115]] -// CHECK5-NEXT: [[TMP9:%.*]] = load double*, double** [[_TMP1]], align 8, !dbg [[DBG116:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = bitcast double* [[TMP9]] to i8*, !dbg [[DBG115]] -// CHECK5-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 8, !dbg [[DBG115]] -// CHECK5-NEXT: [[TMP11:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*, !dbg [[DBG115]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG115]] -// CHECK5-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB20]], i32 [[TMP3]], i64 8, i8* [[TMP11]], void (i8*, i8*)* @.omp.copyprivate.copy_func.7, i32 [[TMP12]]), !dbg [[DBG115]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG115]] +// CHECK5-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP1]], align 8, !dbg [[DBG116:![0-9]+]] +// CHECK5-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8*, !dbg [[DBG115]] +// CHECK5-NEXT: store i8* [[TMP14]], i8** [[TMP12]], align 8, !dbg [[DBG115]] +// CHECK5-NEXT: [[TMP15:%.*]] = bitcast [1 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*, !dbg [[DBG115]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG115]] +// CHECK5-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB20]], i32 [[TMP7]], i64 8, i8* [[TMP15]], void (i8*, i8*)* @.omp.copyprivate.copy_func.7, i32 [[TMP16]]), !dbg [[DBG115]] // CHECK5-NEXT: ret void, !dbg [[DBG117:![0-9]+]] // // @@ -3626,9 +3682,7 @@ // CHECK5-NEXT: [[A2:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[C7:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 @@ -3653,340 +3707,350 @@ // CHECK5-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 2, !dbg [[DBG128:![0-9]+]] // CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[C8]], align 8, !dbg [[DBG128]] // CHECK5-NEXT: store i32* [[TMP1]], i32** [[C7]], align 8, !dbg [[DBG128]] -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A2]], align 8, !dbg [[DBG129:![0-9]+]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4, !dbg [[DBG130:![0-9]+]] -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*, !dbg [[DBG130]] -// CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4, !dbg [[DBG130]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8, !dbg [[DBG130]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[B4]], align 4, !dbg [[DBG130]] -// CHECK5-NEXT: [[CONV9:%.*]] = bitcast i64* [[B_CASTED]] to i32*, !dbg [[DBG130]] -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[CONV9]], align 4, !dbg [[DBG130]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8, !dbg [[DBG130]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[C7]], align 8, !dbg [[DBG131:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, !dbg [[DBG130]] -// CHECK5-NEXT: [[CONV10:%.*]] = bitcast i64* [[C_CASTED]] to i32*, !dbg [[DBG130]] -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[CONV10]], align 4, !dbg [[DBG130]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[C_CASTED]], align 8, !dbg [[DBG130]] -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB26:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.SS* [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]]), !dbg [[DBG130]] -// CHECK5-NEXT: ret void, !dbg [[DBG132:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG129:![0-9]+]] +// CHECK5-NEXT: store %struct.SS* [[THIS1]], %struct.SS** [[TMP2]], align 8, !dbg [[DBG129]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG129]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A2]], align 8, !dbg [[DBG130:![0-9]+]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG130]] +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[TMP3]], align 8, !dbg [[DBG129]] +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG129]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[B4]], align 4, !dbg [[DBG131:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4, !dbg [[DBG129]] +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG129]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[C7]], align 8, !dbg [[DBG132:![0-9]+]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4, !dbg [[DBG132]] +// CHECK5-NEXT: store i32 [[TMP10]], i32* [[TMP8]], align 8, !dbg [[DBG129]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB26:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG129]] +// CHECK5-NEXT: ret void, !dbg [[DBG133:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG133:![0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG134:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[_TMP4:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[_TMP5:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 8 // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8, !dbg [[DBG134:![0-9]+]] -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*, !dbg [[DBG134]] -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32*, !dbg [[DBG134]] -// CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32*, !dbg [[DBG134]] -// CHECK5-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8, !dbg [[DBG134]] -// CHECK5-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8, !dbg [[DBG134]] -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8, !dbg [[DBG135:![0-9]+]] -// CHECK5-NEXT: store i32* [[TMP1]], i32** [[_TMP4]], align 8, !dbg [[DBG136:![0-9]+]] -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[_TMP3]], align 8, !dbg [[DBG137:![0-9]+]] -// CHECK5-NEXT: store i32* [[TMP2]], i32** [[_TMP5]], align 8, !dbg [[DBG136]] -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG136]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG136]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4, !dbg [[DBG136]] -// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB24:[0-9]+]], i32 [[TMP4]]), !dbg [[DBG136]] -// CHECK5-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0, !dbg [[DBG136]] -// CHECK5-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG136]] +// CHECK5-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG135:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8, !dbg [[DBG135]] +// CHECK5-NEXT: store i32 [[TMP4]], i32* [[A]], align 4, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG135]] +// CHECK5-NEXT: store i32 [[TMP6]], i32* [[B]], align 4, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8, !dbg [[DBG135]] +// CHECK5-NEXT: store i32 [[TMP8]], i32* [[C]], align 4, !dbg [[DBG135]] +// CHECK5-NEXT: store i32* [[A]], i32** [[TMP]], align 8, !dbg [[DBG135]] +// CHECK5-NEXT: store i32* [[C]], i32** [[_TMP1]], align 8, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8, !dbg [[DBG136:![0-9]+]] +// CHECK5-NEXT: store i32* [[TMP9]], i32** [[_TMP2]], align 8, !dbg [[DBG137:![0-9]+]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP1]], align 8, !dbg [[DBG138:![0-9]+]] +// CHECK5-NEXT: store i32* [[TMP10]], i32** [[_TMP3]], align 8, !dbg [[DBG137]] +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG137]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG137]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !dbg [[DBG137]] +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB24:[0-9]+]], i32 [[TMP12]]), !dbg [[DBG137]] +// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0, !dbg [[DBG137]] +// CHECK5-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG137]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0, !dbg [[DBG138:![0-9]+]] -// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP7]], align 8, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP4]], align 8, !dbg [[DBG139:![0-9]+]] -// CHECK5-NEXT: store i32* [[TMP9]], i32** [[TMP8]], align 8, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2, !dbg [[DBG138]] -// CHECK5-NEXT: store i32* [[CONV1]], i32** [[TMP10]], align 8, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP5]], align 8, !dbg [[DBG139]] -// CHECK5-NEXT: store i32* [[TMP12]], i32** [[TMP11]], align 8, !dbg [[DBG138]] -// CHECK5-NEXT: invoke void @_ZZN2SSC1ERiENKUlvE_clEv(%class.anon.1* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG138]] +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 0, !dbg [[DBG139:![0-9]+]] +// CHECK5-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP15]], align 8, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 1, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32*, i32** [[_TMP2]], align 8, !dbg [[DBG140:![0-9]+]] +// CHECK5-NEXT: store i32* [[TMP17]], i32** [[TMP16]], align 8, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 2, !dbg [[DBG139]] +// CHECK5-NEXT: store i32* [[B]], i32** [[TMP18]], align 8, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 3, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32*, i32** [[_TMP3]], align 8, !dbg [[DBG140]] +// CHECK5-NEXT: store i32* [[TMP20]], i32** [[TMP19]], align 8, !dbg [[DBG139]] +// CHECK5-NEXT: invoke void @_ZZN2SSC1ERiENKUlvE_clEv(%class.anon.3* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG139]] // CHECK5: invoke.cont: -// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB24]], i32 [[TMP4]]), !dbg [[DBG138]] -// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG138]] -// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG138]] +// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB24]], i32 [[TMP12]]), !dbg [[DBG139]] +// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG139]] +// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG139]] // CHECK5: omp_if.end: -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i32*, i32** [[_TMP4]], align 8, !dbg [[DBG140:![0-9]+]] -// CHECK5-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to i8*, !dbg [[DBG138]] -// CHECK5-NEXT: store i8* [[TMP15]], i8** [[TMP13]], align 8, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP17:%.*]] = bitcast i32* [[CONV1]] to i8*, !dbg [[DBG138]] -// CHECK5-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32*, i32** [[_TMP5]], align 8, !dbg [[DBG141:![0-9]+]] -// CHECK5-NEXT: [[TMP20:%.*]] = bitcast i32* [[TMP19]] to i8*, !dbg [[DBG138]] -// CHECK5-NEXT: store i8* [[TMP20]], i8** [[TMP18]], align 8, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP21:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG138]] -// CHECK5-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB24]], i32 [[TMP4]], i64 24, i8* [[TMP21]], void (i8*, i8*)* @.omp.copyprivate.copy_func.9, i32 [[TMP22]]), !dbg [[DBG138]] -// CHECK5-NEXT: ret void, !dbg [[DBG142:![0-9]+]] +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i32*, i32** [[_TMP2]], align 8, !dbg [[DBG141:![0-9]+]] +// CHECK5-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to i8*, !dbg [[DBG139]] +// CHECK5-NEXT: store i8* [[TMP23]], i8** [[TMP21]], align 8, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP25:%.*]] = bitcast i32* [[B]] to i8*, !dbg [[DBG139]] +// CHECK5-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32*, i32** [[_TMP3]], align 8, !dbg [[DBG142:![0-9]+]] +// CHECK5-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP27]] to i8*, !dbg [[DBG139]] +// CHECK5-NEXT: store i8* [[TMP28]], i8** [[TMP26]], align 8, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP29:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG139]] +// CHECK5-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB24]], i32 [[TMP12]], i64 24, i8* [[TMP29]], void (i8*, i8*)* @.omp.copyprivate.copy_func.9, i32 [[TMP30]]), !dbg [[DBG139]] +// CHECK5-NEXT: ret void, !dbg [[DBG143:![0-9]+]] // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP23:%.*]] = landingpad { i8*, i32 } -// CHECK5-NEXT: catch i8* null, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP24:%.*]] = extractvalue { i8*, i32 } [[TMP23]], 0, !dbg [[DBG138]] -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP24]]) #[[ATTR13]], !dbg [[DBG138]] -// CHECK5-NEXT: unreachable, !dbg [[DBG138]] +// CHECK5-NEXT: [[TMP31:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: catch i8* null, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP32:%.*]] = extractvalue { i8*, i32 } [[TMP31]], 0, !dbg [[DBG139]] +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP32]]) #[[ATTR13]], !dbg [[DBG139]] +// CHECK5-NEXT: unreachable, !dbg [[DBG139]] // // // CHECK5-LABEL: define {{[^@]+}}@_ZZN2SSC1ERiENKUlvE_clEv -// CHECK5-SAME: (%class.anon.1* noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR10]] align 2 !dbg [[DBG143:![0-9]+]] { +// CHECK5-SAME: (%class.anon.3* noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR10]] align 2 !dbg [[DBG144:![0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.1*, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: store %class.anon.1* [[THIS]], %class.anon.1** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[THIS1:%.*]] = load %class.anon.1*, %class.anon.1** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], %class.anon.1* [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %class.anon.3*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// CHECK5-NEXT: store %class.anon.3* [[THIS]], %class.anon.3** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load %class.anon.3*, %class.anon.3** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_3:%.*]], %class.anon.3* [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP1:%.*]] = load %struct.SS*, %struct.SS** [[TMP0]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1, !dbg [[DBG144:![0-9]+]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8, !dbg [[DBG144]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4, !dbg [[DBG145:![0-9]+]] -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1, !dbg [[DBG145]] -// CHECK5-NEXT: store i32 [[INC]], i32* [[TMP3]], align 4, !dbg [[DBG145]] -// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 2, !dbg [[DBG146:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8, !dbg [[DBG146]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !dbg [[DBG147:![0-9]+]] -// CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1, !dbg [[DBG147]] -// CHECK5-NEXT: store i32 [[DEC]], i32* [[TMP6]], align 4, !dbg [[DBG147]] -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 3, !dbg [[DBG148:![0-9]+]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP8]], align 8, !dbg [[DBG148]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4, !dbg [[DBG149:![0-9]+]] -// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1, !dbg [[DBG149]] -// CHECK5-NEXT: store i32 [[DIV]], i32* [[TMP9]], align 4, !dbg [[DBG149]] -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 1, !dbg [[DBG150:![0-9]+]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8, !dbg [[DBG150]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4, !dbg [[DBG151:![0-9]+]] -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*, !dbg [[DBG151]] -// CHECK5-NEXT: store i32 [[TMP13]], i32* [[CONV]], align 4, !dbg [[DBG151]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i64, i64* [[A_CASTED]], align 8, !dbg [[DBG151]] -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 2, !dbg [[DBG152:![0-9]+]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i32*, i32** [[TMP15]], align 8, !dbg [[DBG152]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4, !dbg [[DBG151]] -// CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32*, !dbg [[DBG151]] -// CHECK5-NEXT: store i32 [[TMP17]], i32* [[CONV2]], align 4, !dbg [[DBG151]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i64, i64* [[B_CASTED]], align 8, !dbg [[DBG151]] -// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[THIS1]], i32 0, i32 3, !dbg [[DBG153:![0-9]+]] -// CHECK5-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP19]], align 8, !dbg [[DBG153]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4, !dbg [[DBG151]] -// CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[C_CASTED]] to i32*, !dbg [[DBG151]] -// CHECK5-NEXT: store i32 [[TMP21]], i32* [[CONV3]], align 4, !dbg [[DBG151]] -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[C_CASTED]], align 8, !dbg [[DBG151]] -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64, i64, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]]), !dbg [[DBG151]] -// CHECK5-NEXT: ret void, !dbg [[DBG154:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[THIS1]], i32 0, i32 1, !dbg [[DBG145:![0-9]+]] +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8, !dbg [[DBG145]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4, !dbg [[DBG146:![0-9]+]] +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1, !dbg [[DBG146]] +// CHECK5-NEXT: store i32 [[INC]], i32* [[TMP3]], align 4, !dbg [[DBG146]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[THIS1]], i32 0, i32 2, !dbg [[DBG147:![0-9]+]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8, !dbg [[DBG147]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !dbg [[DBG148:![0-9]+]] +// CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1, !dbg [[DBG148]] +// CHECK5-NEXT: store i32 [[DEC]], i32* [[TMP6]], align 4, !dbg [[DBG148]] +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[THIS1]], i32 0, i32 3, !dbg [[DBG149:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP8]], align 8, !dbg [[DBG149]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4, !dbg [[DBG150:![0-9]+]] +// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1, !dbg [[DBG150]] +// CHECK5-NEXT: store i32 [[DIV]], i32* [[TMP9]], align 4, !dbg [[DBG150]] +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG151:![0-9]+]] +// CHECK5-NEXT: store %struct.SS* [[TMP1]], %struct.SS** [[TMP11]], align 8, !dbg [[DBG151]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG151]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[THIS1]], i32 0, i32 1, !dbg [[DBG152:![0-9]+]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 8, !dbg [[DBG152]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4, !dbg [[DBG152]] +// CHECK5-NEXT: store i32 [[TMP15]], i32* [[TMP12]], align 8, !dbg [[DBG151]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG151]] +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[THIS1]], i32 0, i32 2, !dbg [[DBG153:![0-9]+]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8, !dbg [[DBG153]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4, !dbg [[DBG153]] +// CHECK5-NEXT: store i32 [[TMP19]], i32* [[TMP16]], align 4, !dbg [[DBG151]] +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG151]] +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[THIS1]], i32 0, i32 3, !dbg [[DBG154:![0-9]+]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP21]], align 8, !dbg [[DBG154]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4, !dbg [[DBG154]] +// CHECK5-NEXT: store i32 [[TMP23]], i32* [[TMP20]], align 8, !dbg [[DBG151]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB30:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG151]] +// CHECK5-NEXT: ret void, !dbg [[DBG155:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.9 -// CHECK5-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR9]] !dbg [[DBG155:![0-9]+]] { +// CHECK5-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR9]] !dbg [[DBG156:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8 // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK5-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG156:![0-9]+]] -// CHECK5-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [3 x i8*]*, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [3 x i8*]*, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 0, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 0, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !dbg [[DBG157:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP12]], i32* [[TMP8]], align 4, !dbg [[DBG157]] -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 1, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i32*, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 1, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32*, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4, !dbg [[DBG158:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP19]], i32* [[TMP15]], align 4, !dbg [[DBG158]] -// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 2, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to i32*, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 2, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32*, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4, !dbg [[DBG159:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP26]], i32* [[TMP22]], align 4, !dbg [[DBG159]] -// CHECK5-NEXT: ret void, !dbg [[DBG159]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG157:![0-9]+]] +// CHECK5-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [3 x i8*]*, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [3 x i8*]*, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 0, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 0, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !dbg [[DBG158:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP12]], i32* [[TMP8]], align 4, !dbg [[DBG158]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 1, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i32*, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 1, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32*, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4, !dbg [[DBG159:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP19]], i32* [[TMP15]], align 4, !dbg [[DBG159]] +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 2, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to i32*, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 2, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32*, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4, !dbg [[DBG160:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP26]], i32* [[TMP22]], align 4, !dbg [[DBG160]] +// CHECK5-NEXT: ret void, !dbg [[DBG160]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] !dbg [[DBG160:![0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] !dbg [[DBG161:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[_TMP4:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[_TMP5:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8, !dbg [[DBG161:![0-9]+]] -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*, !dbg [[DBG161]] -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32*, !dbg [[DBG161]] -// CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[C_ADDR]] to i32*, !dbg [[DBG161]] -// CHECK5-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8, !dbg [[DBG161]] -// CHECK5-NEXT: store i32* [[CONV2]], i32** [[_TMP3]], align 8, !dbg [[DBG161]] -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8, !dbg [[DBG162:![0-9]+]] -// CHECK5-NEXT: store i32* [[TMP1]], i32** [[_TMP4]], align 8, !dbg [[DBG163:![0-9]+]] -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[_TMP3]], align 8, !dbg [[DBG164:![0-9]+]] -// CHECK5-NEXT: store i32* [[TMP2]], i32** [[_TMP5]], align 8, !dbg [[DBG163]] -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG163]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG163]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4, !dbg [[DBG163]] -// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB28:[0-9]+]], i32 [[TMP4]]), !dbg [[DBG163]] -// CHECK5-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0, !dbg [[DBG163]] -// CHECK5-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG163]] +// CHECK5-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG162:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8, !dbg [[DBG162]] +// CHECK5-NEXT: store i32 [[TMP4]], i32* [[A]], align 4, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG162]] +// CHECK5-NEXT: store i32 [[TMP6]], i32* [[B]], align 4, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8, !dbg [[DBG162]] +// CHECK5-NEXT: store i32 [[TMP8]], i32* [[C]], align 4, !dbg [[DBG162]] +// CHECK5-NEXT: store i32* [[A]], i32** [[TMP]], align 8, !dbg [[DBG162]] +// CHECK5-NEXT: store i32* [[C]], i32** [[_TMP1]], align 8, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8, !dbg [[DBG163:![0-9]+]] +// CHECK5-NEXT: store i32* [[TMP9]], i32** [[_TMP2]], align 8, !dbg [[DBG164:![0-9]+]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP1]], align 8, !dbg [[DBG165:![0-9]+]] +// CHECK5-NEXT: store i32* [[TMP10]], i32** [[_TMP3]], align 8, !dbg [[DBG164]] +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG164]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG164]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !dbg [[DBG164]] +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB28:[0-9]+]], i32 [[TMP12]]), !dbg [[DBG164]] +// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0, !dbg [[DBG164]] +// CHECK5-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG164]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[_TMP4]], align 8, !dbg [[DBG162]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, !dbg [[DBG165:![0-9]+]] -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1, !dbg [[DBG165]] -// CHECK5-NEXT: store i32 [[INC]], i32* [[TMP7]], align 4, !dbg [[DBG165]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4, !dbg [[DBG166:![0-9]+]] -// CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1, !dbg [[DBG166]] -// CHECK5-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4, !dbg [[DBG166]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP5]], align 8, !dbg [[DBG164]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4, !dbg [[DBG167:![0-9]+]] -// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1, !dbg [[DBG167]] -// CHECK5-NEXT: store i32 [[DIV]], i32* [[TMP10]], align 4, !dbg [[DBG167]] -// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB28]], i32 [[TMP4]]), !dbg [[DBG165]] -// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG165]] -// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG165]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32*, i32** [[_TMP2]], align 8, !dbg [[DBG163]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4, !dbg [[DBG166:![0-9]+]] +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1, !dbg [[DBG166]] +// CHECK5-NEXT: store i32 [[INC]], i32* [[TMP15]], align 4, !dbg [[DBG166]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[B]], align 4, !dbg [[DBG167:![0-9]+]] +// CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP17]], -1, !dbg [[DBG167]] +// CHECK5-NEXT: store i32 [[DEC]], i32* [[B]], align 4, !dbg [[DBG167]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP3]], align 8, !dbg [[DBG165]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4, !dbg [[DBG168:![0-9]+]] +// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP19]], 1, !dbg [[DBG168]] +// CHECK5-NEXT: store i32 [[DIV]], i32* [[TMP18]], align 4, !dbg [[DBG168]] +// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB28]], i32 [[TMP12]]), !dbg [[DBG166]] +// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG166]] +// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG166]] // CHECK5: omp_if.end: -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG165]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP4]], align 8, !dbg [[DBG168:![0-9]+]] -// CHECK5-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8*, !dbg [[DBG165]] -// CHECK5-NEXT: store i8* [[TMP14]], i8** [[TMP12]], align 8, !dbg [[DBG165]] -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1, !dbg [[DBG165]] -// CHECK5-NEXT: [[TMP16:%.*]] = bitcast i32* [[CONV1]] to i8*, !dbg [[DBG165]] -// CHECK5-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8, !dbg [[DBG165]] -// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2, !dbg [[DBG165]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP5]], align 8, !dbg [[DBG169:![0-9]+]] -// CHECK5-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to i8*, !dbg [[DBG165]] -// CHECK5-NEXT: store i8* [[TMP19]], i8** [[TMP17]], align 8, !dbg [[DBG165]] -// CHECK5-NEXT: [[TMP20:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*, !dbg [[DBG165]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG165]] -// CHECK5-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB28]], i32 [[TMP4]], i64 24, i8* [[TMP20]], void (i8*, i8*)* @.omp.copyprivate.copy_func.11, i32 [[TMP21]]), !dbg [[DBG165]] -// CHECK5-NEXT: ret void, !dbg [[DBG170:![0-9]+]] +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG166]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i32*, i32** [[_TMP2]], align 8, !dbg [[DBG169:![0-9]+]] +// CHECK5-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to i8*, !dbg [[DBG166]] +// CHECK5-NEXT: store i8* [[TMP22]], i8** [[TMP20]], align 8, !dbg [[DBG166]] +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1, !dbg [[DBG166]] +// CHECK5-NEXT: [[TMP24:%.*]] = bitcast i32* [[B]] to i8*, !dbg [[DBG166]] +// CHECK5-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8, !dbg [[DBG166]] +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2, !dbg [[DBG166]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32*, i32** [[_TMP3]], align 8, !dbg [[DBG170:![0-9]+]] +// CHECK5-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to i8*, !dbg [[DBG166]] +// CHECK5-NEXT: store i8* [[TMP27]], i8** [[TMP25]], align 8, !dbg [[DBG166]] +// CHECK5-NEXT: [[TMP28:%.*]] = bitcast [3 x i8*]* [[DOTOMP_COPYPRIVATE_CPR_LIST]] to i8*, !dbg [[DBG166]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG166]] +// CHECK5-NEXT: call void @__kmpc_copyprivate(%struct.ident_t* @[[GLOB28]], i32 [[TMP12]], i64 24, i8* [[TMP28]], void (i8*, i8*)* @.omp.copyprivate.copy_func.11, i32 [[TMP29]]), !dbg [[DBG166]] +// CHECK5-NEXT: ret void, !dbg [[DBG171:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.11 -// CHECK5-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR9]] !dbg [[DBG171:![0-9]+]] { +// CHECK5-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR9]] !dbg [[DBG172:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8 // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK5-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG172:![0-9]+]] -// CHECK5-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [3 x i8*]*, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [3 x i8*]*, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 0, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 0, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !dbg [[DBG173:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP12]], i32* [[TMP8]], align 4, !dbg [[DBG173]] -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 1, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i32*, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 1, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32*, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4, !dbg [[DBG174:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP19]], i32* [[TMP15]], align 4, !dbg [[DBG174]] -// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 2, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to i32*, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 2, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32*, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4, !dbg [[DBG175:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP26]], i32* [[TMP22]], align 4, !dbg [[DBG175]] -// CHECK5-NEXT: ret void, !dbg [[DBG175]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG173:![0-9]+]] +// CHECK5-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [3 x i8*]*, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [3 x i8*]*, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 0, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 0, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !dbg [[DBG174:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP12]], i32* [[TMP8]], align 4, !dbg [[DBG174]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 1, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i32*, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 1, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32*, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4, !dbg [[DBG175:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP19]], i32* [[TMP15]], align 4, !dbg [[DBG175]] +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP3]], i64 0, i64 2, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to i32*, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP5]], i64 0, i64 2, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32*, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4, !dbg [[DBG176:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP26]], i32* [[TMP22]], align 4, !dbg [[DBG176]] +// CHECK5-NEXT: ret void, !dbg [[DBG176]] // // // CHECK5-LABEL: define {{[^@]+}}@_Z15parallel_singlev -// CHECK5-SAME: () #[[ATTR10]] !dbg [[DBG176:![0-9]+]] { +// CHECK5-SAME: () #[[ATTR10]] !dbg [[DBG177:![0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB35:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..12 to void (i32*, i32*, ...)*)), !dbg [[DBG177:![0-9]+]] -// CHECK5-NEXT: ret void, !dbg [[DBG178:![0-9]+]] +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB35:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG178:![0-9]+]] +// CHECK5-NEXT: ret void, !dbg [[DBG179:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG179:![0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG180:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG180:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4, !dbg [[DBG180]] -// CHECK5-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB32:[0-9]+]], i32 [[TMP1]]), !dbg [[DBG180]] -// CHECK5-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0, !dbg [[DBG180]] -// CHECK5-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG180]] +// CHECK5-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG181:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG182:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4, !dbg [[DBG182]] +// CHECK5-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB32:[0-9]+]], i32 [[TMP2]]), !dbg [[DBG182]] +// CHECK5-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0, !dbg [[DBG182]] +// CHECK5-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG182]] // CHECK5: omp_if.then: // CHECK5-NEXT: invoke void @_Z3foov() -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG181:![0-9]+]] +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG183:![0-9]+]] // CHECK5: invoke.cont: -// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB32]], i32 [[TMP1]]), !dbg [[DBG181]] -// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG181]] +// CHECK5-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB32]], i32 [[TMP2]]), !dbg [[DBG183]] +// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG183]] // CHECK5: omp_if.end: -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB33:[0-9]+]], i32 [[TMP1]]), !dbg [[DBG182:![0-9]+]] -// CHECK5-NEXT: ret void, !dbg [[DBG182]] +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB33:[0-9]+]], i32 [[TMP2]]), !dbg [[DBG184:![0-9]+]] +// CHECK5-NEXT: ret void, !dbg [[DBG184]] // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 } -// CHECK5-NEXT: catch i8* null, !dbg [[DBG181]] -// CHECK5-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG181]] -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP5]]) #[[ATTR13]], !dbg [[DBG181]] -// CHECK5-NEXT: unreachable, !dbg [[DBG181]] +// CHECK5-NEXT: [[TMP5:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: catch i8* null, !dbg [[DBG183]] +// CHECK5-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG183]] +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP6]]) #[[ATTR13]], !dbg [[DBG183]] +// CHECK5-NEXT: unreachable, !dbg [[DBG183]] // // // CHECK5-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_single_codegen.cpp -// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG183:![0-9]+]] { +// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG185:![0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void @__cxx_global_var_init(), !dbg [[DBG184:![0-9]+]] -// CHECK5-NEXT: call void @__cxx_global_var_init.4(), !dbg [[DBG184]] -// CHECK5-NEXT: call void @.__omp_threadprivate_init_.(), !dbg [[DBG184]] -// CHECK5-NEXT: call void @.__omp_threadprivate_init_..3(), !dbg [[DBG184]] +// CHECK5-NEXT: call void @__cxx_global_var_init(), !dbg [[DBG186:![0-9]+]] +// CHECK5-NEXT: call void @__cxx_global_var_init.4(), !dbg [[DBG186]] +// CHECK5-NEXT: call void @.__omp_threadprivate_init_.(), !dbg [[DBG186]] +// CHECK5-NEXT: call void @.__omp_threadprivate_init_..3(), !dbg [[DBG186]] // CHECK5-NEXT: ret void // // diff --git a/clang/test/OpenMP/single_firstprivate_codegen.cpp b/clang/test/OpenMP/single_firstprivate_codegen.cpp --- a/clang/test/OpenMP/single_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/single_firstprivate_codegen.cpp @@ -355,6 +355,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* @@ -364,22 +365,30 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR]], i32 3) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[T_VAR]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[VAR]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP6]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2StC2Ev @@ -448,82 +457,81 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 -// CHECK1-NEXT: br i1 [[TMP7]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP12]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast [2 x i32]* [[TMP4]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [2 x %struct.S.0]* [[TMP6]] to %struct.S.0* +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP11]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP16]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* nonnull align 4 dereferenceable(4) [[TMP3]], %struct.St* [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX7]] to i8* -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[VAR5]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.0* nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[ARRAYIDX3]] to i8* +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP16]], [[OMP_ARRAYCPY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP21]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done9: -// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: +// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -741,39 +749,41 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK3-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK3-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP5:%.*]] = load volatile i32, i32* @g, align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[G]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load volatile i32, i32* @g, align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[G]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: store i32 1, i32* [[G]], align 4 -// CHECK3-NEXT: store i32 17, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store i32* [[G]], i32** [[TMP7]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[SIVAR1]], i32** [[TMP8]], align 8 +// CHECK3-NEXT: store i32 17, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[G]], i32** [[TMP9]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[TMP10]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(16) [[REF_TMP]]) -// CHECK3-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -933,38 +943,43 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>** [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* @_ZZ4mainE5sivar) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store i32* @_ZZ4mainE5sivar, i32** [[TMP0]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK4-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>, align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK4-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK4-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK4-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK4-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP5:%.*]] = load volatile i32, i32* @g, align 4 -// CHECK4-NEXT: store i32 [[TMP5]], i32* [[G]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], i32* [[SIVAR1]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load volatile i32, i32* @g, align 4 +// CHECK4-NEXT: store i32 [[TMP7]], i32* [[G]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], i32* [[SIVAR]], align 4 // CHECK4-NEXT: store i32 1, i32* [[G]], align 4 -// CHECK4-NEXT: store i32 37, i32* [[SIVAR1]], align 4 +// CHECK4-NEXT: store i32 37, i32* [[SIVAR]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]], i32 0, i32 1 @@ -976,22 +991,22 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP7:%.*]] = load volatile i32, i32* [[G]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP7]], i32* [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], i32* [[BLOCK_CAPTURED2]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]] to void ()* -// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP9]] to %struct.__block_literal_generic* -// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP11:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK4-NEXT: [[TMP12:%.*]] = load i8*, i8** [[TMP10]], align 8 -// CHECK4-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP13]](i8* [[TMP11]]) -// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP9:%.*]] = load volatile i32, i32* [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP9]], i32* [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]], i32 0, i32 6 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], i32* [[BLOCK_CAPTURED1]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, i32 }>* [[BLOCK]] to void ()* +// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP11]] to %struct.__block_literal_generic* +// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP13:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP12]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP15]](i8* [[TMP13]]) +// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/single_private_codegen.cpp b/clang/test/OpenMP/single_private_codegen.cpp --- a/clang/test/OpenMP/single_private_codegen.cpp +++ b/clang/test/OpenMP/single_private_codegen.cpp @@ -100,6 +100,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 @@ -110,7 +111,7 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] @@ -153,10 +154,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -164,11 +166,13 @@ // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 @@ -181,29 +185,29 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast %struct.S* [[ARRAYIDX1]] to i8* -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast %struct.S* [[ARRAYIDX1]] to i8* +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP6]], i8* align 4 [[TMP7]], i64 4, i1 false) // CHECK1-NEXT: store i32 303, i32* [[SIVAR]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP7]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP8]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done3: -// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -226,6 +230,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* @@ -235,7 +240,7 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 @@ -311,21 +316,24 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 @@ -338,28 +346,28 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast %struct.S.0* [[ARRAYIDX1]] to i8* -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast %struct.S.0* [[ARRAYIDX1]] to i8* +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP6]], i8* align 4 [[TMP7]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP8]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done3: -// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -418,32 +426,35 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK3-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK3-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: // CHECK3-NEXT: store double 1.000000e+00, double* [[G]], align 8 // CHECK3-NEXT: store i32 101, i32* [[SIVAR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G]], double** [[TMP4]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[TMP6]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) -// CHECK3-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // @@ -463,28 +474,32 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* // CHECK4-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK4-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, align 8 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK4-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK4-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: // CHECK4-NEXT: store double 1.000000e+00, double* [[G]], align 8 // CHECK4-NEXT: store i32 101, i32* [[SIVAR]], align 4 @@ -499,22 +514,22 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.1 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP4:%.*]] = load volatile double, double* [[G]], align 8 -// CHECK4-NEXT: store volatile double [[TMP4]], double* [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load volatile double, double* [[G]], align 8 +// CHECK4-NEXT: store volatile double [[TMP5]], double* [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[SIVAR]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], i32* [[BLOCK_CAPTURED1]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]] to void ()* -// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP6]] to %struct.__block_literal_generic* -// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP8:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* -// CHECK4-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP7]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP10]](i8* noundef [[TMP8]]) -// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], i32* [[BLOCK_CAPTURED1]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]] to void ()* +// CHECK4-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP7]] to %struct.__block_literal_generic* +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP9:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// CHECK4-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP8]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to void (i8*)* +// CHECK4-NEXT: call void [[TMP11]](i8* noundef [[TMP9]]) +// CHECK4-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_codegen_global_capture.cpp b/clang/test/OpenMP/target_codegen_global_capture.cpp --- a/clang/test/OpenMP/target_codegen_global_capture.cpp +++ b/clang/test/OpenMP/target_codegen_global_capture.cpp @@ -363,6 +363,7 @@ // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -397,27 +398,33 @@ // CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00 // CHECK1-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float // CHECK1-NEXT: store float [[CONV14]], float* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[CONV3]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i16* [[CONV6]], i16** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store double* [[CONV7]], double** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store float* [[CONV8]], float** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load double, double* [[CONV3]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP7]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 2 -// CHECK1-NEXT: [[CONV15:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV4]], align 2 +// CHECK1-NEXT: [[CONV15:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK1-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] // CHECK1: land.lhs.true17: -// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 4 -// CHECK1-NEXT: [[CONV18:%.*]] = fpext float [[TMP6]] to double +// CHECK1-NEXT: [[TMP9:%.*]] = load float, float* [[CONV5]], align 4 +// CHECK1-NEXT: [[CONV18:%.*]] = fpext float [[TMP9]] to double // CHECK1-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*, double*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]] +// CHECK1-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -425,34 +432,34 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[D:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[GD:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: store double* [[GD]], double** [[GD_ADDR]], align 8 -// CHECK1-NEXT: store float* [[SD]], float** [[SD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[GD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[SD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float*, float** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP7]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[TMP1]], align 8 -// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD2]], double* [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load float, float* [[TMP2]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double +// CHECK1-NEXT: store i16 [[CONV1]], i16* [[TMP2]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, double* [[TMP4]], align 8 +// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD2]], double* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load float, float* [[TMP6]], align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP9]] to double // CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float -// CHECK1-NEXT: store float [[CONV5]], float* [[TMP2]], align 4 +// CHECK1-NEXT: store float [[CONV5]], float* [[TMP6]], align 4 // CHECK1-NEXT: ret void // // @@ -463,46 +470,52 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i16 [[A]], i16* [[A_ADDR]], align 2 // CHECK1-NEXT: store i16 [[B]], i16* [[B_ADDR]], align 2 // CHECK1-NEXT: store i16 [[C]], i16* [[C_ADDR]], align 2 // CHECK1-NEXT: store i16 [[D]], i16* [[D_ADDR]], align 2 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*, i16*, i16*, i16*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i16* [[A_ADDR]], i16* [[B_ADDR]], i16* [[C_ADDR]], i16* [[D_ADDR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[A_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[B_ADDR]], align 2 -// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i16* [[A_ADDR]], i16** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i16* [[B_ADDR]], i16** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i16* [[C_ADDR]], i16** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i16* [[D_ADDR]], i16** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[A_ADDR]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[B_ADDR]], align 2 +// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP5]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV1]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[C_ADDR]], align 2 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[C_ADDR]], align 2 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP6]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CONV2]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[D_ADDR]], align 2 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[D_ADDR]], align 2 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP7]] to i32 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CONV4]] -// CHECK1-NEXT: [[TMP4:%.*]] = load float, float* @_ZZ3barssssE2Sa, align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = fptosi float [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load float, float* @_ZZ3barssssE2Sa, align 4 +// CHECK1-NEXT: [[CONV6:%.*]] = fptosi float [[TMP8]] to i32 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[CONV6]] -// CHECK1-NEXT: [[TMP5:%.*]] = load float, float* @_ZZ3barssssE2Sb, align 4 -// CHECK1-NEXT: [[CONV8:%.*]] = fptosi float [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP9:%.*]] = load float, float* @_ZZ3barssssE2Sb, align 4 +// CHECK1-NEXT: [[CONV8:%.*]] = fptosi float [[TMP9]] to i32 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD7]], [[CONV8]] -// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* @_ZZ3barssssE2Sc, align 4 -// CHECK1-NEXT: [[CONV10:%.*]] = fptosi float [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP10:%.*]] = load float, float* @_ZZ3barssssE2Sc, align 4 +// CHECK1-NEXT: [[CONV10:%.*]] = fptosi float [[TMP10]] to i32 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CONV10]] -// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* @_ZZ3barssssE2Sd, align 4 -// CHECK1-NEXT: [[CONV12:%.*]] = fptosi float [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP11:%.*]] = load float, float* @_ZZ3barssssE2Sd, align 4 +// CHECK1-NEXT: [[CONV12:%.*]] = fptosi float [[TMP11]] to i32 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]] // CHECK1-NEXT: ret i32 [[ADD13]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[GB_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SB_CASTED:%.*]] = alloca i64, align 8 @@ -517,148 +530,150 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [9 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i16* [[A]], i16** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 8 -// CHECK1-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16*, i16** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16*, i16** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[TMP4]], align 2 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP4]], i16* [[CONV]], align 2 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load double, double* @Gb, align 8 +// CHECK1-NEXT: store i16 [[TMP9]], i16* [[CONV]], align 2 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[B_CASTED]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load double, double* @Gb, align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[GB_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP6]], double* [[CONV1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[GB_CASTED]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load float, float* @_ZZ3barssssE2Sb, align 4 +// CHECK1-NEXT: store double [[TMP11]], double* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[GB_CASTED]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load float, float* @_ZZ3barssssE2Sb, align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SB_CASTED]] to float* -// CHECK1-NEXT: store float [[TMP8]], float* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[SB_CASTED]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load double, double* @Gc, align 8 +// CHECK1-NEXT: store float [[TMP13]], float* [[CONV2]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[SB_CASTED]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load double, double* @Gc, align 8 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[GC_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP10]], double* [[CONV3]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[GC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK1-NEXT: store double [[TMP15]], double* [[CONV3]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[GC_CASTED]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP6]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[C_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP12]], i16* [[CONV4]], align 2 -// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[C_CASTED]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load float, float* @_ZZ3barssssE2Sc, align 4 +// CHECK1-NEXT: store i16 [[TMP17]], i16* [[CONV4]], align 2 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[C_CASTED]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* @_ZZ3barssssE2Sc, align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[SC_CASTED]] to float* -// CHECK1-NEXT: store float [[TMP14]], float* [[CONV5]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[SC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP3]], align 2 +// CHECK1-NEXT: store float [[TMP19]], float* [[CONV5]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[SC_CASTED]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP8]], align 2 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[D_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP16]], i16* [[CONV6]], align 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[D_CASTED]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load double, double* @Gd, align 8 +// CHECK1-NEXT: store i16 [[TMP21]], i16* [[CONV6]], align 2 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[D_CASTED]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load double, double* @Gd, align 8 // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[GD_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP18]], double* [[CONV7]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[GD_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load float, float* @_ZZ3barssssE2Sd, align 4 +// CHECK1-NEXT: store double [[TMP23]], double* [[CONV7]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[GD_CASTED]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load float, float* @_ZZ3barssssE2Sd, align 4 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[SD_CASTED]] to float* -// CHECK1-NEXT: store float [[TMP20]], float* [[CONV8]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SD_CASTED]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* @Ga, align 8 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP22]], 0.000000e+00 +// CHECK1-NEXT: store float [[TMP25]], float* [[CONV8]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[SD_CASTED]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load double, double* @Ga, align 8 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP27]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP23:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK1-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP28]] to i32 // CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[CONV9]], 0 // CHECK1-NEXT: br i1 [[CMP10]], label [[LAND_LHS_TRUE11:%.*]], label [[OMP_IF_ELSE]] // CHECK1: land.lhs.true11: -// CHECK1-NEXT: [[TMP24:%.*]] = load float, float* @_ZZ3barssssE2Sa, align 4 -// CHECK1-NEXT: [[CONV12:%.*]] = fpext float [[TMP24]] to double +// CHECK1-NEXT: [[TMP29:%.*]] = load float, float* @_ZZ3barssssE2Sa, align 4 +// CHECK1-NEXT: [[CONV12:%.*]] = fpext float [[TMP29]] to double // CHECK1-NEXT: [[CMP13:%.*]] = fcmp ogt double [[CONV12]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* -// CHECK1-NEXT: store i64 [[TMP5]], i64* [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64* -// CHECK1-NEXT: store i64 [[TMP5]], i64* [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store i8* null, i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64* -// CHECK1-NEXT: store i64 [[TMP7]], i64* [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP10]], i64* [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i64* -// CHECK1-NEXT: store i64 [[TMP7]], i64* [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store i64 [[TMP10]], i64* [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 // CHECK1-NEXT: store i8* null, i8** [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 // CHECK1-NEXT: [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i64* -// CHECK1-NEXT: store i64 [[TMP9]], i64* [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 // CHECK1-NEXT: [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i64* -// CHECK1-NEXT: store i64 [[TMP9]], i64* [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK1-NEXT: store i8* null, i8** [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 // CHECK1-NEXT: [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i64* -// CHECK1-NEXT: store i64 [[TMP11]], i64* [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP14]], i64* [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 // CHECK1-NEXT: [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64* -// CHECK1-NEXT: store i64 [[TMP11]], i64* [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK1-NEXT: store i64 [[TMP14]], i64* [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 // CHECK1-NEXT: store i8* null, i8** [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 // CHECK1-NEXT: [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i64* -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK1-NEXT: store i64 [[TMP16]], i64* [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 // CHECK1-NEXT: [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i64* -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 +// CHECK1-NEXT: store i64 [[TMP16]], i64* [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 // CHECK1-NEXT: store i8* null, i8** [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 // CHECK1-NEXT: [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i64* -// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5 +// CHECK1-NEXT: store i64 [[TMP18]], i64* [[TMP51]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 // CHECK1-NEXT: [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i64* -// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP53]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5 +// CHECK1-NEXT: store i64 [[TMP18]], i64* [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 // CHECK1-NEXT: store i8* null, i8** [[TMP54]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 // CHECK1-NEXT: [[TMP56:%.*]] = bitcast i8** [[TMP55]] to i64* -// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP20]], i64* [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5 // CHECK1-NEXT: [[TMP58:%.*]] = bitcast i8** [[TMP57]] to i64* -// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP58]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6 +// CHECK1-NEXT: store i64 [[TMP20]], i64* [[TMP58]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5 // CHECK1-NEXT: store i8* null, i8** [[TMP59]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 // CHECK1-NEXT: [[TMP61:%.*]] = bitcast i8** [[TMP60]] to i64* -// CHECK1-NEXT: store i64 [[TMP19]], i64* [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP22]], i64* [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6 // CHECK1-NEXT: [[TMP63:%.*]] = bitcast i8** [[TMP62]] to i64* -// CHECK1-NEXT: store i64 [[TMP19]], i64* [[TMP63]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7 +// CHECK1-NEXT: store i64 [[TMP22]], i64* [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6 // CHECK1-NEXT: store i8* null, i8** [[TMP64]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 // CHECK1-NEXT: [[TMP66:%.*]] = bitcast i8** [[TMP65]] to i64* -// CHECK1-NEXT: store i64 [[TMP21]], i64* [[TMP66]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 [[TMP24]], i64* [[TMP66]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7 // CHECK1-NEXT: [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i64* -// CHECK1-NEXT: store i64 [[TMP21]], i64* [[TMP68]], align 8 -// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8 +// CHECK1-NEXT: store i64 [[TMP24]], i64* [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7 // CHECK1-NEXT: store i8* null, i8** [[TMP69]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP72:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94.region_id, i32 9, i8** [[TMP70]], i8** [[TMP71]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_sizes.3, i32 0, i32 0), i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null) -// CHECK1-NEXT: [[TMP73:%.*]] = icmp ne i32 [[TMP72]], 0 -// CHECK1-NEXT: br i1 [[TMP73]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64* +// CHECK1-NEXT: store i64 [[TMP26]], i64* [[TMP71]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i64* +// CHECK1-NEXT: store i64 [[TMP26]], i64* [[TMP73]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8 +// CHECK1-NEXT: store i8* null, i8** [[TMP74]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP77:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94.region_id, i32 9, i8** [[TMP75]], i8** [[TMP76]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_sizes.3, i32 0, i32 0), i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null) +// CHECK1-NEXT: [[TMP78:%.*]] = icmp ne i32 [[TMP77]], 0 +// CHECK1-NEXT: br i1 [[TMP78]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP26]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP26]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -676,6 +691,7 @@ // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -710,27 +726,33 @@ // CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00 // CHECK1-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float // CHECK1-NEXT: store float [[CONV14]], float* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[CONV3]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i16* [[CONV6]], i16** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store double* [[CONV7]], double** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store float* [[CONV8]], float** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load double, double* [[CONV3]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP7]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 2 -// CHECK1-NEXT: [[CONV15:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV4]], align 2 +// CHECK1-NEXT: [[CONV15:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK1-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] // CHECK1: land.lhs.true17: -// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 4 -// CHECK1-NEXT: [[CONV18:%.*]] = fpext float [[TMP6]] to double +// CHECK1-NEXT: [[TMP9:%.*]] = load float, float* [[CONV5]], align 4 +// CHECK1-NEXT: [[CONV18:%.*]] = fpext float [[TMP9]] to double // CHECK1-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*, double*, float*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]] +// CHECK1-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -738,34 +760,34 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[D:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[GD:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: store double* [[GD]], double** [[GD_ADDR]], align 8 -// CHECK1-NEXT: store float* [[SD]], float** [[SD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[GD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[SD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float*, float** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP7]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[TMP1]], align 8 -// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD2]], double* [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load float, float* [[TMP2]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double +// CHECK1-NEXT: store i16 [[CONV1]], i16* [[TMP2]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, double* [[TMP4]], align 8 +// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD2]], double* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load float, float* [[TMP6]], align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP9]] to double // CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float -// CHECK1-NEXT: store float [[CONV5]], float* [[TMP2]], align 4 +// CHECK1-NEXT: store float [[CONV5]], float* [[TMP6]], align 4 // CHECK1-NEXT: ret void // // @@ -795,46 +817,52 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i16 [[A]], i16* [[A_ADDR]], align 2 // CHECK1-NEXT: store i16 [[B]], i16* [[B_ADDR]], align 2 // CHECK1-NEXT: store i16 [[C]], i16* [[C_ADDR]], align 2 // CHECK1-NEXT: store i16 [[D]], i16* [[D_ADDR]], align 2 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*, i16*, i16*, i16*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i16* [[A_ADDR]], i16* [[B_ADDR]], i16* [[C_ADDR]], i16* [[D_ADDR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[A_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[B_ADDR]], align 2 -// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i16* [[A_ADDR]], i16** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i16* [[B_ADDR]], i16** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i16* [[C_ADDR]], i16** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i16* [[D_ADDR]], i16** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[A_ADDR]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[B_ADDR]], align 2 +// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP5]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV1]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[C_ADDR]], align 2 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[C_ADDR]], align 2 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP6]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CONV2]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[D_ADDR]], align 2 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[D_ADDR]], align 2 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP7]] to i32 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CONV4]] -// CHECK1-NEXT: [[TMP4:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = fptosi float [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 +// CHECK1-NEXT: [[CONV6:%.*]] = fptosi float [[TMP8]] to i32 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[CONV6]] -// CHECK1-NEXT: [[TMP5:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 -// CHECK1-NEXT: [[CONV8:%.*]] = fptosi float [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP9:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 +// CHECK1-NEXT: [[CONV8:%.*]] = fptosi float [[TMP9]] to i32 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD7]], [[CONV8]] -// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 -// CHECK1-NEXT: [[CONV10:%.*]] = fptosi float [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP10:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 +// CHECK1-NEXT: [[CONV10:%.*]] = fptosi float [[TMP10]] to i32 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CONV10]] -// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 -// CHECK1-NEXT: [[CONV12:%.*]] = fptosi float [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP11:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 +// CHECK1-NEXT: [[CONV12:%.*]] = fptosi float [[TMP11]] to i32 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]] // CHECK1-NEXT: ret i32 [[ADD13]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[GB_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SB_CASTED:%.*]] = alloca i64, align 8 @@ -849,148 +877,150 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [9 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i16* [[A]], i16** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 8 -// CHECK1-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16*, i16** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16*, i16** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[TMP4]], align 2 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP4]], i16* [[CONV]], align 2 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load double, double* @Gb, align 8 +// CHECK1-NEXT: store i16 [[TMP9]], i16* [[CONV]], align 2 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[B_CASTED]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load double, double* @Gb, align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[GB_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP6]], double* [[CONV1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[GB_CASTED]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 +// CHECK1-NEXT: store double [[TMP11]], double* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[GB_CASTED]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SB_CASTED]] to float* -// CHECK1-NEXT: store float [[TMP8]], float* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[SB_CASTED]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load double, double* @Gc, align 8 +// CHECK1-NEXT: store float [[TMP13]], float* [[CONV2]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[SB_CASTED]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load double, double* @Gc, align 8 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[GC_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP10]], double* [[CONV3]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[GC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK1-NEXT: store double [[TMP15]], double* [[CONV3]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[GC_CASTED]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP6]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[C_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP12]], i16* [[CONV4]], align 2 -// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[C_CASTED]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 +// CHECK1-NEXT: store i16 [[TMP17]], i16* [[CONV4]], align 2 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[C_CASTED]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[SC_CASTED]] to float* -// CHECK1-NEXT: store float [[TMP14]], float* [[CONV5]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[SC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP3]], align 2 +// CHECK1-NEXT: store float [[TMP19]], float* [[CONV5]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[SC_CASTED]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP8]], align 2 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[D_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP16]], i16* [[CONV6]], align 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[D_CASTED]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load double, double* @Gd, align 8 +// CHECK1-NEXT: store i16 [[TMP21]], i16* [[CONV6]], align 2 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[D_CASTED]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load double, double* @Gd, align 8 // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[GD_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP18]], double* [[CONV7]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[GD_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 +// CHECK1-NEXT: store double [[TMP23]], double* [[CONV7]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[GD_CASTED]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[SD_CASTED]] to float* -// CHECK1-NEXT: store float [[TMP20]], float* [[CONV8]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SD_CASTED]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* @Ga, align 8 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP22]], 0.000000e+00 +// CHECK1-NEXT: store float [[TMP25]], float* [[CONV8]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[SD_CASTED]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load double, double* @Ga, align 8 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP27]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP23:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK1-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP28]] to i32 // CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[CONV9]], 0 // CHECK1-NEXT: br i1 [[CMP10]], label [[LAND_LHS_TRUE11:%.*]], label [[OMP_IF_ELSE]] // CHECK1: land.lhs.true11: -// CHECK1-NEXT: [[TMP24:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 -// CHECK1-NEXT: [[CONV12:%.*]] = fpext float [[TMP24]] to double +// CHECK1-NEXT: [[TMP29:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 +// CHECK1-NEXT: [[CONV12:%.*]] = fpext float [[TMP29]] to double // CHECK1-NEXT: [[CMP13:%.*]] = fcmp ogt double [[CONV12]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* -// CHECK1-NEXT: store i64 [[TMP5]], i64* [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64* -// CHECK1-NEXT: store i64 [[TMP5]], i64* [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store i8* null, i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64* -// CHECK1-NEXT: store i64 [[TMP7]], i64* [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP10]], i64* [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i64* -// CHECK1-NEXT: store i64 [[TMP7]], i64* [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store i64 [[TMP10]], i64* [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 // CHECK1-NEXT: store i8* null, i8** [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 // CHECK1-NEXT: [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i64* -// CHECK1-NEXT: store i64 [[TMP9]], i64* [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 // CHECK1-NEXT: [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i64* -// CHECK1-NEXT: store i64 [[TMP9]], i64* [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK1-NEXT: store i8* null, i8** [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 // CHECK1-NEXT: [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i64* -// CHECK1-NEXT: store i64 [[TMP11]], i64* [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP14]], i64* [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 // CHECK1-NEXT: [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64* -// CHECK1-NEXT: store i64 [[TMP11]], i64* [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK1-NEXT: store i64 [[TMP14]], i64* [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 // CHECK1-NEXT: store i8* null, i8** [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 // CHECK1-NEXT: [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i64* -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK1-NEXT: store i64 [[TMP16]], i64* [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 // CHECK1-NEXT: [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i64* -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 +// CHECK1-NEXT: store i64 [[TMP16]], i64* [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 // CHECK1-NEXT: store i8* null, i8** [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 // CHECK1-NEXT: [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i64* -// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5 +// CHECK1-NEXT: store i64 [[TMP18]], i64* [[TMP51]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 // CHECK1-NEXT: [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i64* -// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP53]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5 +// CHECK1-NEXT: store i64 [[TMP18]], i64* [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 // CHECK1-NEXT: store i8* null, i8** [[TMP54]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 // CHECK1-NEXT: [[TMP56:%.*]] = bitcast i8** [[TMP55]] to i64* -// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP20]], i64* [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5 // CHECK1-NEXT: [[TMP58:%.*]] = bitcast i8** [[TMP57]] to i64* -// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP58]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6 +// CHECK1-NEXT: store i64 [[TMP20]], i64* [[TMP58]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5 // CHECK1-NEXT: store i8* null, i8** [[TMP59]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 // CHECK1-NEXT: [[TMP61:%.*]] = bitcast i8** [[TMP60]] to i64* -// CHECK1-NEXT: store i64 [[TMP19]], i64* [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP22]], i64* [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6 // CHECK1-NEXT: [[TMP63:%.*]] = bitcast i8** [[TMP62]] to i64* -// CHECK1-NEXT: store i64 [[TMP19]], i64* [[TMP63]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7 +// CHECK1-NEXT: store i64 [[TMP22]], i64* [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6 // CHECK1-NEXT: store i8* null, i8** [[TMP64]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 // CHECK1-NEXT: [[TMP66:%.*]] = bitcast i8** [[TMP65]] to i64* -// CHECK1-NEXT: store i64 [[TMP21]], i64* [[TMP66]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 [[TMP24]], i64* [[TMP66]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7 // CHECK1-NEXT: [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i64* -// CHECK1-NEXT: store i64 [[TMP21]], i64* [[TMP68]], align 8 -// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8 +// CHECK1-NEXT: store i64 [[TMP24]], i64* [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7 // CHECK1-NEXT: store i8* null, i8** [[TMP69]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP72:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145.region_id, i32 9, i8** [[TMP70]], i8** [[TMP71]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_sizes.7, i32 0, i32 0), i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null) -// CHECK1-NEXT: [[TMP73:%.*]] = icmp ne i32 [[TMP72]], 0 -// CHECK1-NEXT: br i1 [[TMP73]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64* +// CHECK1-NEXT: store i64 [[TMP26]], i64* [[TMP71]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i64* +// CHECK1-NEXT: store i64 [[TMP26]], i64* [[TMP73]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8 +// CHECK1-NEXT: store i8* null, i8** [[TMP74]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP77:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145.region_id, i32 9, i8** [[TMP75]], i8** [[TMP76]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_sizes.7, i32 0, i32 0), i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null) +// CHECK1-NEXT: [[TMP78:%.*]] = icmp ne i32 [[TMP77]], 0 +// CHECK1-NEXT: br i1 [[TMP78]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP26]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP26]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -1008,6 +1038,7 @@ // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -1042,27 +1073,33 @@ // CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00 // CHECK1-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float // CHECK1-NEXT: store float [[CONV14]], float* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[CONV3]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i16* [[CONV6]], i16** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store double* [[CONV7]], double** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store float* [[CONV8]], float** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load double, double* [[CONV3]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP7]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 2 -// CHECK1-NEXT: [[CONV15:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV4]], align 2 +// CHECK1-NEXT: [[CONV15:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK1-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] // CHECK1: land.lhs.true17: -// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 4 -// CHECK1-NEXT: [[CONV18:%.*]] = fpext float [[TMP6]] to double +// CHECK1-NEXT: [[TMP9:%.*]] = load float, float* [[CONV5]], align 4 +// CHECK1-NEXT: [[CONV18:%.*]] = fpext float [[TMP9]] to double // CHECK1-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*, double*, float*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]] +// CHECK1-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -1070,34 +1107,34 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[D:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[GD:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: store double* [[GD]], double** [[GD_ADDR]], align 8 -// CHECK1-NEXT: store float* [[SD]], float** [[SD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[GD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[SD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float*, float** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP7]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[TMP1]], align 8 -// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD2]], double* [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load float, float* [[TMP2]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double +// CHECK1-NEXT: store i16 [[CONV1]], i16* [[TMP2]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, double* [[TMP4]], align 8 +// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD2]], double* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load float, float* [[TMP6]], align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP9]] to double // CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float -// CHECK1-NEXT: store float [[CONV5]], float* [[TMP2]], align 4 +// CHECK1-NEXT: store float [[CONV5]], float* [[TMP6]], align 4 // CHECK1-NEXT: ret void // // @@ -1293,6 +1330,7 @@ // CHECK3-NEXT: [[GB6:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[GC7:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[GD8:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -1333,27 +1371,33 @@ // CHECK3-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00 // CHECK3-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float // CHECK3-NEXT: store float [[CONV14]], float* [[CONV1]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load double, double* [[GC7]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i16* [[CONV4]], i16** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store double* [[GD8]], double** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store float* [[CONV5]], float** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load double, double* [[GC7]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP13]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 2 -// CHECK3-NEXT: [[CONV15:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, i16* [[CONV2]], align 2 +// CHECK3-NEXT: [[CONV15:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK3-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] // CHECK3: land.lhs.true17: -// CHECK3-NEXT: [[TMP12:%.*]] = load float, float* [[CONV3]], align 4 -// CHECK3-NEXT: [[CONV18:%.*]] = fpext float [[TMP12]] to double +// CHECK3-NEXT: [[TMP15:%.*]] = load float, float* [[CONV3]], align 4 +// CHECK3-NEXT: [[CONV18:%.*]] = fpext float [[TMP15]] to double // CHECK3-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*, double*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]] +// CHECK3-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -1361,34 +1405,34 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[D:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[GD:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 4 -// CHECK3-NEXT: [[GD_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SD_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 4 -// CHECK3-NEXT: store double* [[GD]], double** [[GD_ADDR]], align 4 -// CHECK3-NEXT: store float* [[SD]], float** [[SD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[GD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load float*, float** [[SD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load float*, float** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load double, double* [[TMP1]], align 8 -// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD2]], double* [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load float, float* [[TMP2]], align 4 -// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double +// CHECK3-NEXT: store i16 [[CONV1]], i16* [[TMP2]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = load double, double* [[TMP4]], align 8 +// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD2]], double* [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load float, float* [[TMP6]], align 4 +// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP9]] to double // CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK3-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float -// CHECK3-NEXT: store float [[CONV5]], float* [[TMP2]], align 4 +// CHECK3-NEXT: store float [[CONV5]], float* [[TMP6]], align 4 // CHECK3-NEXT: ret void // // @@ -1399,46 +1443,52 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i16 [[A]], i16* [[A_ADDR]], align 2 // CHECK3-NEXT: store i16 [[B]], i16* [[B_ADDR]], align 2 // CHECK3-NEXT: store i16 [[C]], i16* [[C_ADDR]], align 2 // CHECK3-NEXT: store i16 [[D]], i16* [[D_ADDR]], align 2 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*, i16*, i16*, i16*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i16* [[A_ADDR]], i16* [[B_ADDR]], i16* [[C_ADDR]], i16* [[D_ADDR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[A_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[B_ADDR]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i16* [[A_ADDR]], i16** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i16* [[B_ADDR]], i16** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i16* [[C_ADDR]], i16** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i16* [[D_ADDR]], i16** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[A_ADDR]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[B_ADDR]], align 2 +// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP5]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV1]] -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[C_ADDR]], align 2 -// CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[C_ADDR]], align 2 +// CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP6]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CONV2]] -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[D_ADDR]], align 2 -// CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[D_ADDR]], align 2 +// CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CONV4]] -// CHECK3-NEXT: [[TMP4:%.*]] = load float, float* @_ZZ3barssssE2Sa, align 4 -// CHECK3-NEXT: [[CONV6:%.*]] = fptosi float [[TMP4]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load float, float* @_ZZ3barssssE2Sa, align 4 +// CHECK3-NEXT: [[CONV6:%.*]] = fptosi float [[TMP8]] to i32 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[CONV6]] -// CHECK3-NEXT: [[TMP5:%.*]] = load float, float* @_ZZ3barssssE2Sb, align 4 -// CHECK3-NEXT: [[CONV8:%.*]] = fptosi float [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP9:%.*]] = load float, float* @_ZZ3barssssE2Sb, align 4 +// CHECK3-NEXT: [[CONV8:%.*]] = fptosi float [[TMP9]] to i32 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD7]], [[CONV8]] -// CHECK3-NEXT: [[TMP6:%.*]] = load float, float* @_ZZ3barssssE2Sc, align 4 -// CHECK3-NEXT: [[CONV10:%.*]] = fptosi float [[TMP6]] to i32 +// CHECK3-NEXT: [[TMP10:%.*]] = load float, float* @_ZZ3barssssE2Sc, align 4 +// CHECK3-NEXT: [[CONV10:%.*]] = fptosi float [[TMP10]] to i32 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CONV10]] -// CHECK3-NEXT: [[TMP7:%.*]] = load float, float* @_ZZ3barssssE2Sd, align 4 -// CHECK3-NEXT: [[CONV12:%.*]] = fptosi float [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP11:%.*]] = load float, float* @_ZZ3barssssE2Sd, align 4 +// CHECK3-NEXT: [[CONV12:%.*]] = fptosi float [[TMP11]] to i32 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]] // CHECK3-NEXT: ret i32 [[ADD13]] // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i16*, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SB_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4 @@ -1450,136 +1500,138 @@ // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [9 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i16* [[A]], i16** [[A_ADDR]], align 4 -// CHECK3-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 -// CHECK3-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 -// CHECK3-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16*, i16** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i16*, i16** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[TMP4]], align 2 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP4]], i16* [[CONV]], align 2 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load float, float* @_ZZ3barssssE2Sb, align 4 +// CHECK3-NEXT: store i16 [[TMP9]], i16* [[CONV]], align 2 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_CASTED]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load float, float* @_ZZ3barssssE2Sb, align 4 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[SB_CASTED]] to float* -// CHECK3-NEXT: store float [[TMP6]], float* [[CONV1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[SB_CASTED]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK3-NEXT: store float [[TMP11]], float* [[CONV1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[SB_CASTED]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i16, i16* [[TMP6]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[C_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP8]], i16* [[CONV2]], align 2 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[C_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load float, float* @_ZZ3barssssE2Sc, align 4 +// CHECK3-NEXT: store i16 [[TMP13]], i16* [[CONV2]], align 2 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[C_CASTED]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load float, float* @_ZZ3barssssE2Sc, align 4 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[SC_CASTED]] to float* -// CHECK3-NEXT: store float [[TMP10]], float* [[CONV3]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[SC_CASTED]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i16, i16* [[TMP3]], align 2 +// CHECK3-NEXT: store float [[TMP15]], float* [[CONV3]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[SC_CASTED]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP8]], align 2 // CHECK3-NEXT: [[CONV4:%.*]] = bitcast i32* [[D_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP12]], i16* [[CONV4]], align 2 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[D_CASTED]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load float, float* @_ZZ3barssssE2Sd, align 4 +// CHECK3-NEXT: store i16 [[TMP17]], i16* [[CONV4]], align 2 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[D_CASTED]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load float, float* @_ZZ3barssssE2Sd, align 4 // CHECK3-NEXT: [[CONV5:%.*]] = bitcast i32* [[SD_CASTED]] to float* -// CHECK3-NEXT: store float [[TMP14]], float* [[CONV5]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[SD_CASTED]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load double, double* @Ga, align 8 -// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP16]], 0.000000e+00 +// CHECK3-NEXT: store float [[TMP19]], float* [[CONV5]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[SD_CASTED]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load double, double* @Ga, align 8 +// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP21]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK3-NEXT: [[CONV6:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK3-NEXT: [[TMP22:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK3-NEXT: [[CONV6:%.*]] = sext i16 [[TMP22]] to i32 // CHECK3-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[CONV6]], 0 // CHECK3-NEXT: br i1 [[CMP7]], label [[LAND_LHS_TRUE8:%.*]], label [[OMP_IF_ELSE]] // CHECK3: land.lhs.true8: -// CHECK3-NEXT: [[TMP18:%.*]] = load float, float* @_ZZ3barssssE2Sa, align 4 -// CHECK3-NEXT: [[CONV9:%.*]] = fpext float [[TMP18]] to double +// CHECK3-NEXT: [[TMP23:%.*]] = load float, float* @_ZZ3barssssE2Sa, align 4 +// CHECK3-NEXT: [[CONV9:%.*]] = fpext float [[TMP23]] to double // CHECK3-NEXT: [[CMP10:%.*]] = fcmp ogt double [[CONV9]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32* -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32* -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store i8* null, i8** [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to double** -// CHECK3-NEXT: store double* @Gb, double** [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP27:%.*]] = bitcast i8** [[TMP26]] to double** -// CHECK3-NEXT: store double* @Gb, double** [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32* +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32* +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 // CHECK3-NEXT: store i8* null, i8** [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32* -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32* -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP30:%.*]] = bitcast i8** [[TMP29]] to double** +// CHECK3-NEXT: store double* @Gb, double** [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP32:%.*]] = bitcast i8** [[TMP31]] to double** +// CHECK3-NEXT: store double* @Gb, double** [[TMP32]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK3-NEXT: store i8* null, i8** [[TMP33]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP35:%.*]] = bitcast i8** [[TMP34]] to double** -// CHECK3-NEXT: store double* @Gc, double** [[TMP35]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP37:%.*]] = bitcast i8** [[TMP36]] to double** -// CHECK3-NEXT: store double* @Gc, double** [[TMP37]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32* +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[TMP35]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32* +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 // CHECK3-NEXT: store i8* null, i8** [[TMP38]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK3-NEXT: [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32* -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP40]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK3-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i32* -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP42]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP40:%.*]] = bitcast i8** [[TMP39]] to double** +// CHECK3-NEXT: store double* @Gc, double** [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to double** +// CHECK3-NEXT: store double* @Gc, double** [[TMP42]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 // CHECK3-NEXT: store i8* null, i8** [[TMP43]], align 4 -// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 // CHECK3-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32* -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[TMP45]], align 4 -// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[TMP45]], align 4 +// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 // CHECK3-NEXT: [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32* -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[TMP47]], align 4 -// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[TMP47]], align 4 +// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 // CHECK3-NEXT: store i8* null, i8** [[TMP48]], align 4 -// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 // CHECK3-NEXT: [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32* -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP50]], align 4 -// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[TMP50]], align 4 +// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5 // CHECK3-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32* -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP52]], align 4 -// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[TMP52]], align 4 +// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5 // CHECK3-NEXT: store i8* null, i8** [[TMP53]], align 4 -// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 -// CHECK3-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to double** -// CHECK3-NEXT: store double* @Gd, double** [[TMP55]], align 4 -// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7 -// CHECK3-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to double** -// CHECK3-NEXT: store double* @Gd, double** [[TMP57]], align 4 -// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i32* +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[TMP55]], align 4 +// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to i32* +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[TMP57]], align 4 +// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6 // CHECK3-NEXT: store i8* null, i8** [[TMP58]], align 4 -// CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 -// CHECK3-NEXT: [[TMP60:%.*]] = bitcast i8** [[TMP59]] to i32* -// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP60]], align 4 -// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8 -// CHECK3-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32* -// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP62]], align 4 -// CHECK3-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP60:%.*]] = bitcast i8** [[TMP59]] to double** +// CHECK3-NEXT: store double* @Gd, double** [[TMP60]], align 4 +// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to double** +// CHECK3-NEXT: store double* @Gd, double** [[TMP62]], align 4 +// CHECK3-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7 // CHECK3-NEXT: store i8* null, i8** [[TMP63]], align 4 -// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP66:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94.region_id, i32 9, i8** [[TMP64]], i8** [[TMP65]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_sizes.3, i32 0, i32 0), i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null) -// CHECK3-NEXT: [[TMP67:%.*]] = icmp ne i32 [[TMP66]], 0 -// CHECK3-NEXT: br i1 [[TMP67]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP65:%.*]] = bitcast i8** [[TMP64]] to i32* +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[TMP65]], align 4 +// CHECK3-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to i32* +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[TMP67]], align 4 +// CHECK3-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 8 +// CHECK3-NEXT: store i8* null, i8** [[TMP68]], align 4 +// CHECK3-NEXT: [[TMP69:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP71:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94.region_id, i32 9, i8** [[TMP69]], i8** [[TMP70]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_sizes.3, i32 0, i32 0), i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null) +// CHECK3-NEXT: [[TMP72:%.*]] = icmp ne i32 [[TMP71]], 0 +// CHECK3-NEXT: br i1 [[TMP72]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i32 [[TMP5]], double* @Gb, i32 [[TMP7]], double* @Gc, i32 [[TMP9]], i32 [[TMP11]], i32 [[TMP13]], double* @Gd, i32 [[TMP15]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i32 [[TMP10]], double* @Gb, i32 [[TMP12]], double* @Gc, i32 [[TMP14]], i32 [[TMP16]], i32 [[TMP18]], double* @Gd, i32 [[TMP20]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i32 [[TMP5]], double* @Gb, i32 [[TMP7]], double* @Gc, i32 [[TMP9]], i32 [[TMP11]], i32 [[TMP13]], double* @Gd, i32 [[TMP15]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i32 [[TMP10]], double* @Gb, i32 [[TMP12]], double* @Gc, i32 [[TMP14]], i32 [[TMP16]], i32 [[TMP18]], double* @Gd, i32 [[TMP20]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -1600,6 +1652,7 @@ // CHECK3-NEXT: [[GB6:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[GC7:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[GD8:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -1640,27 +1693,33 @@ // CHECK3-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00 // CHECK3-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float // CHECK3-NEXT: store float [[CONV14]], float* [[CONV1]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load double, double* [[GC7]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i16* [[CONV4]], i16** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store double* [[GD8]], double** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store float* [[CONV5]], float** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load double, double* [[GC7]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP13]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 2 -// CHECK3-NEXT: [[CONV15:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, i16* [[CONV2]], align 2 +// CHECK3-NEXT: [[CONV15:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK3-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] // CHECK3: land.lhs.true17: -// CHECK3-NEXT: [[TMP12:%.*]] = load float, float* [[CONV3]], align 4 -// CHECK3-NEXT: [[CONV18:%.*]] = fpext float [[TMP12]] to double +// CHECK3-NEXT: [[TMP15:%.*]] = load float, float* [[CONV3]], align 4 +// CHECK3-NEXT: [[CONV18:%.*]] = fpext float [[TMP15]] to double // CHECK3-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*, double*, float*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]] +// CHECK3-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -1668,34 +1727,34 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[D:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[GD:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 4 -// CHECK3-NEXT: [[GD_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SD_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 4 -// CHECK3-NEXT: store double* [[GD]], double** [[GD_ADDR]], align 4 -// CHECK3-NEXT: store float* [[SD]], float** [[SD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[GD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load float*, float** [[SD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load float*, float** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load double, double* [[TMP1]], align 8 -// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD2]], double* [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load float, float* [[TMP2]], align 4 -// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double +// CHECK3-NEXT: store i16 [[CONV1]], i16* [[TMP2]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = load double, double* [[TMP4]], align 8 +// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD2]], double* [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load float, float* [[TMP6]], align 4 +// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP9]] to double // CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK3-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float -// CHECK3-NEXT: store float [[CONV5]], float* [[TMP2]], align 4 +// CHECK3-NEXT: store float [[CONV5]], float* [[TMP6]], align 4 // CHECK3-NEXT: ret void // // @@ -1725,46 +1784,52 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i16 [[A]], i16* [[A_ADDR]], align 2 // CHECK3-NEXT: store i16 [[B]], i16* [[B_ADDR]], align 2 // CHECK3-NEXT: store i16 [[C]], i16* [[C_ADDR]], align 2 // CHECK3-NEXT: store i16 [[D]], i16* [[D_ADDR]], align 2 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*, i16*, i16*, i16*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i16* [[A_ADDR]], i16* [[B_ADDR]], i16* [[C_ADDR]], i16* [[D_ADDR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[A_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[B_ADDR]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i16* [[A_ADDR]], i16** [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i16* [[B_ADDR]], i16** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i16* [[C_ADDR]], i16** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i16* [[D_ADDR]], i16** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[A_ADDR]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[B_ADDR]], align 2 +// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP5]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV1]] -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[C_ADDR]], align 2 -// CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[C_ADDR]], align 2 +// CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP6]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CONV2]] -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[D_ADDR]], align 2 -// CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[D_ADDR]], align 2 +// CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CONV4]] -// CHECK3-NEXT: [[TMP4:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 -// CHECK3-NEXT: [[CONV6:%.*]] = fptosi float [[TMP4]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 +// CHECK3-NEXT: [[CONV6:%.*]] = fptosi float [[TMP8]] to i32 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[CONV6]] -// CHECK3-NEXT: [[TMP5:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 -// CHECK3-NEXT: [[CONV8:%.*]] = fptosi float [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP9:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 +// CHECK3-NEXT: [[CONV8:%.*]] = fptosi float [[TMP9]] to i32 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD7]], [[CONV8]] -// CHECK3-NEXT: [[TMP6:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 -// CHECK3-NEXT: [[CONV10:%.*]] = fptosi float [[TMP6]] to i32 +// CHECK3-NEXT: [[TMP10:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 +// CHECK3-NEXT: [[CONV10:%.*]] = fptosi float [[TMP10]] to i32 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CONV10]] -// CHECK3-NEXT: [[TMP7:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 -// CHECK3-NEXT: [[CONV12:%.*]] = fptosi float [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP11:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 +// CHECK3-NEXT: [[CONV12:%.*]] = fptosi float [[TMP11]] to i32 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]] // CHECK3-NEXT: ret i32 [[ADD13]] // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i16*, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SB_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4 @@ -1776,136 +1841,138 @@ // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [9 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i16* [[A]], i16** [[A_ADDR]], align 4 -// CHECK3-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 -// CHECK3-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 -// CHECK3-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16*, i16** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i16*, i16** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[TMP4]], align 2 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP4]], i16* [[CONV]], align 2 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 +// CHECK3-NEXT: store i16 [[TMP9]], i16* [[CONV]], align 2 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_CASTED]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[SB_CASTED]] to float* -// CHECK3-NEXT: store float [[TMP6]], float* [[CONV1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[SB_CASTED]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK3-NEXT: store float [[TMP11]], float* [[CONV1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[SB_CASTED]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i16, i16* [[TMP6]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[C_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP8]], i16* [[CONV2]], align 2 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[C_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 +// CHECK3-NEXT: store i16 [[TMP13]], i16* [[CONV2]], align 2 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[C_CASTED]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[SC_CASTED]] to float* -// CHECK3-NEXT: store float [[TMP10]], float* [[CONV3]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[SC_CASTED]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i16, i16* [[TMP3]], align 2 +// CHECK3-NEXT: store float [[TMP15]], float* [[CONV3]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[SC_CASTED]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP8]], align 2 // CHECK3-NEXT: [[CONV4:%.*]] = bitcast i32* [[D_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP12]], i16* [[CONV4]], align 2 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[D_CASTED]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 +// CHECK3-NEXT: store i16 [[TMP17]], i16* [[CONV4]], align 2 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[D_CASTED]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 // CHECK3-NEXT: [[CONV5:%.*]] = bitcast i32* [[SD_CASTED]] to float* -// CHECK3-NEXT: store float [[TMP14]], float* [[CONV5]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[SD_CASTED]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load double, double* @Ga, align 8 -// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP16]], 0.000000e+00 +// CHECK3-NEXT: store float [[TMP19]], float* [[CONV5]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[SD_CASTED]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load double, double* @Ga, align 8 +// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP21]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK3-NEXT: [[CONV6:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK3-NEXT: [[TMP22:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK3-NEXT: [[CONV6:%.*]] = sext i16 [[TMP22]] to i32 // CHECK3-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[CONV6]], 0 // CHECK3-NEXT: br i1 [[CMP7]], label [[LAND_LHS_TRUE8:%.*]], label [[OMP_IF_ELSE]] // CHECK3: land.lhs.true8: -// CHECK3-NEXT: [[TMP18:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 -// CHECK3-NEXT: [[CONV9:%.*]] = fpext float [[TMP18]] to double +// CHECK3-NEXT: [[TMP23:%.*]] = load float, float* @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 +// CHECK3-NEXT: [[CONV9:%.*]] = fpext float [[TMP23]] to double // CHECK3-NEXT: [[CMP10:%.*]] = fcmp ogt double [[CONV9]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32* -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32* -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store i8* null, i8** [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to double** -// CHECK3-NEXT: store double* @Gb, double** [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP27:%.*]] = bitcast i8** [[TMP26]] to double** -// CHECK3-NEXT: store double* @Gb, double** [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32* +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32* +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 // CHECK3-NEXT: store i8* null, i8** [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32* -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32* -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP30:%.*]] = bitcast i8** [[TMP29]] to double** +// CHECK3-NEXT: store double* @Gb, double** [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP32:%.*]] = bitcast i8** [[TMP31]] to double** +// CHECK3-NEXT: store double* @Gb, double** [[TMP32]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK3-NEXT: store i8* null, i8** [[TMP33]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP35:%.*]] = bitcast i8** [[TMP34]] to double** -// CHECK3-NEXT: store double* @Gc, double** [[TMP35]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP37:%.*]] = bitcast i8** [[TMP36]] to double** -// CHECK3-NEXT: store double* @Gc, double** [[TMP37]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32* +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[TMP35]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32* +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 // CHECK3-NEXT: store i8* null, i8** [[TMP38]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK3-NEXT: [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32* -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP40]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK3-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i32* -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP42]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP40:%.*]] = bitcast i8** [[TMP39]] to double** +// CHECK3-NEXT: store double* @Gc, double** [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to double** +// CHECK3-NEXT: store double* @Gc, double** [[TMP42]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 // CHECK3-NEXT: store i8* null, i8** [[TMP43]], align 4 -// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 // CHECK3-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32* -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[TMP45]], align 4 -// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[TMP45]], align 4 +// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 // CHECK3-NEXT: [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32* -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[TMP47]], align 4 -// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[TMP47]], align 4 +// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 // CHECK3-NEXT: store i8* null, i8** [[TMP48]], align 4 -// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 // CHECK3-NEXT: [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32* -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP50]], align 4 -// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[TMP50]], align 4 +// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5 // CHECK3-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32* -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP52]], align 4 -// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[TMP52]], align 4 +// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5 // CHECK3-NEXT: store i8* null, i8** [[TMP53]], align 4 -// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 -// CHECK3-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to double** -// CHECK3-NEXT: store double* @Gd, double** [[TMP55]], align 4 -// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7 -// CHECK3-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to double** -// CHECK3-NEXT: store double* @Gd, double** [[TMP57]], align 4 -// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i32* +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[TMP55]], align 4 +// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to i32* +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[TMP57]], align 4 +// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6 // CHECK3-NEXT: store i8* null, i8** [[TMP58]], align 4 -// CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 -// CHECK3-NEXT: [[TMP60:%.*]] = bitcast i8** [[TMP59]] to i32* -// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP60]], align 4 -// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8 -// CHECK3-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32* -// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP62]], align 4 -// CHECK3-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP60:%.*]] = bitcast i8** [[TMP59]] to double** +// CHECK3-NEXT: store double* @Gd, double** [[TMP60]], align 4 +// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to double** +// CHECK3-NEXT: store double* @Gd, double** [[TMP62]], align 4 +// CHECK3-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7 // CHECK3-NEXT: store i8* null, i8** [[TMP63]], align 4 -// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP66:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145.region_id, i32 9, i8** [[TMP64]], i8** [[TMP65]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_sizes.7, i32 0, i32 0), i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null) -// CHECK3-NEXT: [[TMP67:%.*]] = icmp ne i32 [[TMP66]], 0 -// CHECK3-NEXT: br i1 [[TMP67]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP65:%.*]] = bitcast i8** [[TMP64]] to i32* +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[TMP65]], align 4 +// CHECK3-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to i32* +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[TMP67]], align 4 +// CHECK3-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 8 +// CHECK3-NEXT: store i8* null, i8** [[TMP68]], align 4 +// CHECK3-NEXT: [[TMP69:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP71:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145.region_id, i32 9, i8** [[TMP69]], i8** [[TMP70]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_sizes.7, i32 0, i32 0), i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null) +// CHECK3-NEXT: [[TMP72:%.*]] = icmp ne i32 [[TMP71]], 0 +// CHECK3-NEXT: br i1 [[TMP72]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i32 [[TMP5]], double* @Gb, i32 [[TMP7]], double* @Gc, i32 [[TMP9]], i32 [[TMP11]], i32 [[TMP13]], double* @Gd, i32 [[TMP15]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i32 [[TMP10]], double* @Gb, i32 [[TMP12]], double* @Gc, i32 [[TMP14]], i32 [[TMP16]], i32 [[TMP18]], double* @Gd, i32 [[TMP20]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i32 [[TMP5]], double* @Gb, i32 [[TMP7]], double* @Gc, i32 [[TMP9]], i32 [[TMP11]], i32 [[TMP13]], double* @Gd, i32 [[TMP15]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i32 [[TMP10]], double* @Gb, i32 [[TMP12]], double* @Gc, i32 [[TMP14]], i32 [[TMP16]], i32 [[TMP18]], double* @Gd, i32 [[TMP20]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -1926,6 +1993,7 @@ // CHECK3-NEXT: [[GB6:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[GC7:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[GD8:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -1966,27 +2034,33 @@ // CHECK3-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00 // CHECK3-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float // CHECK3-NEXT: store float [[CONV14]], float* [[CONV1]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load double, double* [[GC7]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i16* [[CONV4]], i16** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store double* [[GD8]], double** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store float* [[CONV5]], float** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load double, double* [[GC7]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP13]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 2 -// CHECK3-NEXT: [[CONV15:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, i16* [[CONV2]], align 2 +// CHECK3-NEXT: [[CONV15:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK3-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] // CHECK3: land.lhs.true17: -// CHECK3-NEXT: [[TMP12:%.*]] = load float, float* [[CONV3]], align 4 -// CHECK3-NEXT: [[CONV18:%.*]] = fpext float [[TMP12]] to double +// CHECK3-NEXT: [[TMP15:%.*]] = load float, float* [[CONV3]], align 4 +// CHECK3-NEXT: [[CONV18:%.*]] = fpext float [[TMP15]] to double // CHECK3-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i16*, double*, float*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]] +// CHECK3-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -1994,34 +2068,34 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[D:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[GD:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 4 -// CHECK3-NEXT: [[GD_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SD_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 4 -// CHECK3-NEXT: store double* [[GD]], double** [[GD_ADDR]], align 4 -// CHECK3-NEXT: store float* [[SD]], float** [[SD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[GD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load float*, float** [[SD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP0]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16*, i16** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load float*, float** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[TMP2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV1]], i16* [[TMP0]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load double, double* [[TMP1]], align 8 -// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD2]], double* [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load float, float* [[TMP2]], align 4 -// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double +// CHECK3-NEXT: store i16 [[CONV1]], i16* [[TMP2]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = load double, double* [[TMP4]], align 8 +// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD2]], double* [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load float, float* [[TMP6]], align 4 +// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP9]] to double // CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK3-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float -// CHECK3-NEXT: store float [[CONV5]], float* [[TMP2]], align 4 +// CHECK3-NEXT: store float [[CONV5]], float* [[TMP6]], align 4 // CHECK3-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_map_codegen_03.cpp b/clang/test/OpenMP/target_map_codegen_03.cpp --- a/clang/test/OpenMP/target_map_codegen_03.cpp +++ b/clang/test/OpenMP/target_map_codegen_03.cpp @@ -62,46 +62,51 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[I]], align 4 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[I]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[I]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[I_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[I_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[I_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i64* -// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[I_CASTED]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8** [[TMP5]] to i64* -// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store i8* null, i8** [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28implicit_maps_nested_integeri_l48.region_id, i32 1, i8** [[TMP8]], i8** [[TMP9]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0) -// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK1-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64* +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store i8* null, i8** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28implicit_maps_nested_integeri_l48.region_id, i32 1, i8** [[TMP10]], i8** [[TMP11]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0) +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28implicit_maps_nested_integeri_l48(i64 [[TMP2]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28implicit_maps_nested_integeri_l48(i64 [[TMP4]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: ret void @@ -111,25 +116,30 @@ // CHECK1-SAME: (i64 noundef [[I:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[I]], i64* [[I_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -145,45 +155,50 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[I]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[I]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[I]], i32** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[I_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[I_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[I_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32* -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], i32* [[I_CASTED]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[I_CASTED]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8** [[TMP5]] to i32* -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store i8* null, i8** [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28implicit_maps_nested_integeri_l48.region_id, i32 1, i8** [[TMP8]], i8** [[TMP9]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0) -// CHECK3-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK3-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32* +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store i8* null, i8** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28implicit_maps_nested_integeri_l48.region_id, i32 1, i8** [[TMP10]], i8** [[TMP11]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0) +// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK3-NEXT: br i1 [[TMP13]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28implicit_maps_nested_integeri_l48(i32 [[TMP2]]) #[[ATTR3:[0-9]+]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28implicit_maps_nested_integeri_l48(i32 [[TMP4]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: ret void @@ -193,24 +208,29 @@ // CHECK3-SAME: (i32 noundef [[I:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[I]], i32* [[I_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[I_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[I_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK3-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_parallel_codegen.cpp b/clang/test/OpenMP/target_parallel_codegen.cpp --- a/clang/test/OpenMP/target_parallel_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_codegen.cpp @@ -303,7 +303,7 @@ // CHECK1-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 // CHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8 @@ -519,17 +519,21 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -541,7 +545,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 @@ -552,7 +556,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) @@ -563,8 +567,8 @@ // CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !21 // CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !21 // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !21 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 // CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0, i32 0, i8* null, i32 0, i8* null) #[[ATTR4]] // CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -579,30 +583,33 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -610,39 +617,42 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK1-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 2 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK1-NEXT: store i16 [[CONV1]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK1-NEXT: br i1 [[TMP7]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: -// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP5]]) // CHECK1-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK1: .cancel.continue: // CHECK1-NEXT: ret void @@ -653,45 +663,47 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK1-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 // CHECK1-NEXT: ret void // // @@ -707,7 +719,7 @@ // CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -726,84 +738,95 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [10 x float]*, i64, float*, [5 x [10 x double]]*, i64, i64, double*, %struct.TT*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], [10 x float]* [[TMP0]], i64 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]]) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[TMP2]], float** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP5]], i64* [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: store double* [[TMP6]], double** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK1-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 -// CHECK1-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 -// CHECK1-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double -// CHECK1-NEXT: [[ADD6:%.*]] = fadd double [[CONV5]], 1.000000e+00 -// CHECK1-NEXT: [[CONV7:%.*]] = fptrunc double [[ADD6]] to float -// CHECK1-NEXT: store float [[CONV7]], float* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX8]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP10]] to double -// CHECK1-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 -// CHECK1-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK1-NEXT: store float [[CONV11]], float* [[ARRAYIDX8]], align 4 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX12]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load double, double* [[ARRAYIDX13]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD14]], double* [[ARRAYIDX13]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP12]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, double* [[ARRAYIDX16]], align 8 -// CHECK1-NEXT: [[ADD17:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD17]], double* [[ARRAYIDX16]], align 8 -// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[X]], align 8 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK1-NEXT: store i64 [[ADD18]], i64* [[X]], align 8 -// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load i8, i8* [[Y]], align 8 -// CHECK1-NEXT: [[CONV19:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[CONV19]], 1 -// CHECK1-NEXT: [[CONV21:%.*]] = trunc i32 [[ADD20]] to i8 -// CHECK1-NEXT: store i8 [[CONV21]], i8* [[Y]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK1-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK1-NEXT: store float [[CONV2]], float* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 3 +// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 +// CHECK1-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float +// CHECK1-NEXT: store float [[CONV6]], float* [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX7]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD9]], double* [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 [[TMP23]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX10]], i64 3 +// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], i64* [[X]], align 8 +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK1-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK1-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK1-NEXT: store i8 [[CONV16]], i8* [[Y]], align 8 // CHECK1-NEXT: ret void // // @@ -1108,7 +1131,7 @@ // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -1119,50 +1142,57 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double -// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], double* [[A]], align 8 -// CHECK1-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load double, double* [[A4]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], double* [[A4]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK1-NEXT: [[TMP6:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP6]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load double, double* [[A1]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], double* [[A1]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: [[TMP13:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i64 [[TMP13]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV2]], i16* [[ARRAYIDX3]], align 2 // CHECK1-NEXT: ret void // // @@ -1173,9 +1203,7 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 @@ -1184,58 +1212,62 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* -// CHECK1-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV2]], align 1 +// CHECK1-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK1-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK1-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 -// CHECK1-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK1-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK1-NEXT: store i8 [[CONV5]], i8* [[AAA]], align 1 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP8]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: ret void // // @@ -1245,54 +1277,57 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK1-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: ret void // // @@ -1315,7 +1350,7 @@ // CHECK3-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 // CHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 4 @@ -1528,17 +1563,21 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1550,7 +1589,7 @@ // CHECK3-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 4 // CHECK3-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 4 // CHECK3-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 @@ -1561,7 +1600,7 @@ // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK3-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* // CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) // CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) @@ -1572,8 +1611,8 @@ // CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !22 // CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !22 // CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !22 -// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 -// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 +// CHECK3-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 +// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 // CHECK3-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0, i32 0, i8* null, i32 0, i8* null) #[[ATTR4]] // CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK3-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -1588,27 +1627,32 @@ // CHECK3-SAME: (i32 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -1616,39 +1660,42 @@ // CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 2 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK3-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK3-NEXT: store i16 [[CONV1]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK3-NEXT: br i1 [[TMP7]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK3: .cancel.exit: -// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP5]]) // CHECK3-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK3: .cancel.continue: // CHECK3-NEXT: ret void @@ -1659,42 +1706,46 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK3-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 // CHECK3-NEXT: ret void // // @@ -1710,7 +1761,7 @@ // CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -1728,82 +1779,95 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x float]*, i32, float*, [5 x [10 x double]]*, i32, i32, double*, %struct.TT*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP9]], [10 x float]* [[TMP0]], i32 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[TMP2]], float** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK3-NEXT: store double* [[TMP6]], double** [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK3-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 -// CHECK3-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 -// CHECK3-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP9]] to double -// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK3-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK3-NEXT: store float [[CONV2]], float* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 3 +// CHECK3-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK3-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 // CHECK3-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK3-NEXT: store float [[CONV6]], float* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX7]], align 4 -// CHECK3-NEXT: [[CONV8:%.*]] = fpext float [[TMP10]] to double -// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK3-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK3-NEXT: store float [[CONV10]], float* [[ARRAYIDX7]], align 4 -// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX11]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load double, double* [[ARRAYIDX12]], align 8 -// CHECK3-NEXT: [[ADD13:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD13]], double* [[ARRAYIDX12]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP12]] -// CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX14]], i32 3 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, double* [[ARRAYIDX15]], align 8 -// CHECK3-NEXT: [[ADD16:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 -// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP14:%.*]] = load i64, i64* [[X]], align 4 -// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK3-NEXT: store i64 [[ADD17]], i64* [[X]], align 4 -// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load i8, i8* [[Y]], align 4 -// CHECK3-NEXT: [[CONV18:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK3-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK3-NEXT: store i8 [[CONV20]], i8* [[Y]], align 4 +// CHECK3-NEXT: store float [[CONV6]], float* [[ARRAYIDX3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i32 0, i32 1 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX7]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD9]], double* [[ARRAYIDX8]], align 8 +// CHECK3-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 [[TMP23]] +// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX10]], i32 3 +// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX11]], align 8 +// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8 +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4 +// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK3-NEXT: store i64 [[ADD13]], i64* [[X]], align 4 +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK3-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK3-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK3-NEXT: store i8 [[CONV16]], i8* [[Y]], align 4 // CHECK3-NEXT: ret void // // @@ -2105,7 +2169,7 @@ // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -2115,48 +2179,57 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// CHECK3-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], double* [[A]], align 4 -// CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = load double, double* [[A3]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], double* [[A3]], align 4 -// CHECK3-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK3-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP6]] -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 +// CHECK3-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load double, double* [[A1]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], double* [[A1]], align 4 +// CHECK3-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-NEXT: [[TMP13:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 [[TMP13]] +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: store i16 [[CONV2]], i16* [[ARRAYIDX3]], align 2 // CHECK3-NEXT: ret void // // @@ -2167,9 +2240,7 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 @@ -2177,56 +2248,62 @@ // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* -// CHECK3-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK3-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 -// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK3-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 -// CHECK3-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK3-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK3-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK3-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK3-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK3-NEXT: store i8 [[CONV5]], i8* [[AAA]], align 1 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP8]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: ret void // // @@ -2236,51 +2313,56 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK3-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: ret void // // @@ -2294,17 +2376,21 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100 // CHECK9-SAME: () #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -2312,39 +2398,42 @@ // CHECK9-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK9-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 2 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK9-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK9-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK9-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK9-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK9-NEXT: store i16 [[CONV1]], i16* [[AA]], align 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1) +// CHECK9-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK9-NEXT: br i1 [[TMP7]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK9: .cancel.exit: -// CHECK9-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP5]]) // CHECK9-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK9: .cancel.continue: // CHECK9-NEXT: ret void @@ -2355,45 +2444,47 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK9-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 +// CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK9-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 // CHECK9-NEXT: ret void // // @@ -2409,7 +2500,7 @@ // CHECK9-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -2428,84 +2519,95 @@ // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [10 x float]*, i64, float*, [5 x [10 x double]]*, i64, i64, double*, %struct.TT*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], [10 x float]* [[TMP0]], i64 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]]) +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], i64* [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store float* [[TMP2]], float** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store i64 [[TMP5]], i64* [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK9-NEXT: store double* [[TMP6]], double** [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK9-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 -// CHECK9-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 -// CHECK9-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 -// CHECK9-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double -// CHECK9-NEXT: [[ADD6:%.*]] = fadd double [[CONV5]], 1.000000e+00 -// CHECK9-NEXT: [[CONV7:%.*]] = fptrunc double [[ADD6]] to float -// CHECK9-NEXT: store float [[CONV7]], float* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK9-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX8]], align 4 -// CHECK9-NEXT: [[CONV9:%.*]] = fpext float [[TMP10]] to double -// CHECK9-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 -// CHECK9-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK9-NEXT: store float [[CONV11]], float* [[ARRAYIDX8]], align 4 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 -// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX12]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP11:%.*]] = load double, double* [[ARRAYIDX13]], align 8 -// CHECK9-NEXT: [[ADD14:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD14]], double* [[ARRAYIDX13]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP12]] -// CHECK9-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 -// CHECK9-NEXT: [[TMP13:%.*]] = load double, double* [[ARRAYIDX16]], align 8 -// CHECK9-NEXT: [[ADD17:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD17]], double* [[ARRAYIDX16]], align 8 -// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[X]], align 8 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK9-NEXT: store i64 [[ADD18]], i64* [[X]], align 8 -// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP15:%.*]] = load i8, i8* [[Y]], align 8 -// CHECK9-NEXT: [[CONV19:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[CONV19]], 1 -// CHECK9-NEXT: [[CONV21:%.*]] = trunc i32 [[ADD20]] to i8 -// CHECK9-NEXT: store i8 [[CONV21]], i8* [[Y]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 7 +// CHECK9-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK9-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK9-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK9-NEXT: store float [[CONV2]], float* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 3 +// CHECK9-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK9-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK9-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 +// CHECK9-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float +// CHECK9-NEXT: store float [[CONV6]], float* [[ARRAYIDX3]], align 4 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i64 0, i64 1 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX7]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD9]], double* [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 [[TMP23]] +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX10]], i64 3 +// CHECK9-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8 +// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK9-NEXT: store i64 [[ADD13]], i64* [[X]], align 8 +// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK9-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK9-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK9-NEXT: store i8 [[CONV16]], i8* [[Y]], align 8 // CHECK9-NEXT: ret void // // @@ -2516,9 +2618,7 @@ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 @@ -2527,58 +2627,62 @@ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* -// CHECK9-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV2]], align 1 +// CHECK9-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK9-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK9-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK9-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK9-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK9-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK9-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 -// CHECK9-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK9-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK9-NEXT: store i8 [[CONV5]], i8* [[AAA]], align 1 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP8]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: ret void // // @@ -2590,7 +2694,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -2601,50 +2705,57 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]]) +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double -// CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double +// CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], double* [[A]], align 8 -// CHECK9-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP5:%.*]] = load double, double* [[A4]], align 8 -// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK9-NEXT: store double [[INC]], double* [[A4]], align 8 -// CHECK9-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK9-NEXT: [[TMP6:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP6]] -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK9-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// CHECK9-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = load double, double* [[A1]], align 8 +// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK9-NEXT: store double [[INC]], double* [[A1]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK9-NEXT: [[TMP13:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i64 [[TMP13]] +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK9-NEXT: store i16 [[CONV2]], i16* [[ARRAYIDX3]], align 2 // CHECK9-NEXT: ret void // // @@ -2654,71 +2765,78 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK9-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK9-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100 // CHECK11-SAME: () #[[ATTR0:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -2726,39 +2844,42 @@ // CHECK11-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 2 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK11-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK11-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK11-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK11-NEXT: store i16 [[CONV1]], i16* [[AA]], align 2 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1) +// CHECK11-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK11-NEXT: br i1 [[TMP7]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK11: .cancel.exit: -// CHECK11-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP5]]) // CHECK11-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK11: .cancel.continue: // CHECK11-NEXT: ret void @@ -2769,42 +2890,46 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK11-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK11-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 +// CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK11-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 // CHECK11-NEXT: ret void // // @@ -2820,7 +2945,7 @@ // CHECK11-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -2838,82 +2963,95 @@ // CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x float]*, i32, float*, [5 x [10 x double]]*, i32, i32, double*, %struct.TT*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP9]], [10 x float]* [[TMP0]], i32 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]]) +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store float* [[TMP2]], float** [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK11-NEXT: store double* [[TMP6]], double** [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK11-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 -// CHECK11-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 -// CHECK11-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 -// CHECK11-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = fpext float [[TMP9]] to double -// CHECK11-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 7 +// CHECK11-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK11-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK11-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK11-NEXT: store float [[CONV2]], float* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 3 +// CHECK11-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK11-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK11-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 // CHECK11-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK11-NEXT: store float [[CONV6]], float* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK11-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX7]], align 4 -// CHECK11-NEXT: [[CONV8:%.*]] = fpext float [[TMP10]] to double -// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK11-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK11-NEXT: store float [[CONV10]], float* [[ARRAYIDX7]], align 4 -// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 -// CHECK11-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX11]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP11:%.*]] = load double, double* [[ARRAYIDX12]], align 8 -// CHECK11-NEXT: [[ADD13:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD13]], double* [[ARRAYIDX12]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP12]] -// CHECK11-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX14]], i32 3 -// CHECK11-NEXT: [[TMP13:%.*]] = load double, double* [[ARRAYIDX15]], align 8 -// CHECK11-NEXT: [[ADD16:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 -// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, i64* [[X]], align 4 -// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK11-NEXT: store i64 [[ADD17]], i64* [[X]], align 4 -// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP15:%.*]] = load i8, i8* [[Y]], align 4 -// CHECK11-NEXT: [[CONV18:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK11-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK11-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK11-NEXT: store i8 [[CONV20]], i8* [[Y]], align 4 +// CHECK11-NEXT: store float [[CONV6]], float* [[ARRAYIDX3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i32 0, i32 1 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX7]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD9]], double* [[ARRAYIDX8]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 [[TMP23]] +// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX10]], i32 3 +// CHECK11-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX11]], align 8 +// CHECK11-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8 +// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4 +// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK11-NEXT: store i64 [[ADD13]], i64* [[X]], align 4 +// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK11-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK11-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK11-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK11-NEXT: store i8 [[CONV16]], i8* [[Y]], align 4 // CHECK11-NEXT: ret void // // @@ -2924,9 +3062,7 @@ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 @@ -2934,56 +3070,62 @@ // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK11-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* -// CHECK11-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK11-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK11-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK11-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK11-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK11-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 -// CHECK11-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK11-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK11-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK11-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK11-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK11-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK11-NEXT: store i8 [[CONV5]], i8* [[AAA]], align 1 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP8]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: ret void // // @@ -2995,7 +3137,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -3005,48 +3147,57 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]]) +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// CHECK11-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], double* [[A]], align 4 -// CHECK11-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP5:%.*]] = load double, double* [[A3]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK11-NEXT: store double [[INC]], double* [[A3]], align 4 -// CHECK11-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK11-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP6]] -// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK11-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 +// CHECK11-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP12:%.*]] = load double, double* [[A1]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK11-NEXT: store double [[INC]], double* [[A1]], align 4 +// CHECK11-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK11-NEXT: [[TMP13:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 [[TMP13]] +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK11-NEXT: store i16 [[CONV2]], i16* [[ARRAYIDX3]], align 2 // CHECK11-NEXT: ret void // // @@ -3056,50 +3207,55 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK11-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK11-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK11-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/target_parallel_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_debug_codegen.cpp --- a/clang/test/OpenMP/target_parallel_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_debug_codegen.cpp @@ -74,8 +74,8 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca [10 x [10 x i32]]*, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: store [10 x [10 x [10 x i32]]] addrspace(1)* [[C]], [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8 // CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META39:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40:![0-9]+]] // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 @@ -100,192 +100,143 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG47]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG48:![0-9]+]] -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*, !dbg [[DBG48]] -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[CONV]], align 4, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[A_CASTED]], align 8, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [10 x [10 x [10 x i32]]]* [[TMP2]] to i8*, !dbg [[DBG48]] -// CHECK1-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP11]] to i8*, !dbg [[DBG48]] -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast [10 x [10 x i32]]* [[TMP4]] to i8*, !dbg [[DBG48]] -// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG48]] -// CHECK1-NEXT: store i8* [[TMP7]], i8** [[TMP18]], align 8, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**, !dbg [[DBG48]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x [10 x [10 x i32]]]*, i64, [10 x [10 x i32]]*, i8*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP19]], i64 4), !dbg [[DBG48]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB5:[0-9]+]], i8 2, i1 true), !dbg [[DBG49:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG51:![0-9]+]] +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG48:![0-9]+]] +// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP2]], [10 x [10 x [10 x i32]]]** [[TMP10]], align 8, !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG49:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 8, !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG48]] +// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP4]], [10 x [10 x i32]]** [[TMP13]], align 8, !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG48]] +// CHECK1-NEXT: store i8* [[TMP7]], i8** [[TMP14]], align 8, !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8*, !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP16:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 32), !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP17:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP15]], i8* [[TMP16]]), !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 8, !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP17]] to %struct.anon*, !dbg [[DBG48]] +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP18]], %struct.anon* [[TMP19]], align 8, !dbg [[DBG48]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* null, i8* [[TMP17]]), !dbg [[DBG48]] +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP16]], i64 32), !dbg [[DBG48]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB5:[0-9]+]], i8 2, i1 true), !dbg [[DBG51:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG52:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG47]] // // -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___debug__ -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x [10 x i32]]] addrspace(1)* noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], [10 x [10 x i32]]* noalias noundef [[B:%.*]], i8 addrspace(1)* noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG52:![0-9]+]] { +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] !dbg [[DBG53:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]] addrspace(1)*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8 addrspace(1)*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[B3:%.*]] = alloca [10 x [10 x i32]], align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca [10 x [10 x i32]], align 4 // CHECK1-NEXT: [[F:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[G:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[H:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META59:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META64:![0-9]+]], metadata !DIExpression()), !dbg [[DBG65:![0-9]+]] // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META61:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]] addrspace(1)* [[C]], [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META62:![0-9]+]], metadata !DIExpression()), !dbg [[DBG63:![0-9]+]] -// CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META64:![0-9]+]], metadata !DIExpression()), !dbg [[DBG65:![0-9]+]] -// CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67:![0-9]+]] -// CHECK1-NEXT: store i8 addrspace(1)* [[BB]], i8 addrspace(1)** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8 addrspace(1)** [[BB_ADDR]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]] addrspace(1)*, [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8, !dbg [[DBG70:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP0]] to [10 x [10 x [10 x i32]]]*, !dbg [[DBG70]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP1]], [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP3]], [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: [[TMP4:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)** [[BB_ADDR]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast i8 addrspace(1)* [[TMP5]] to i8*, !dbg [[DBG70]] -// CHECK1-NEXT: store i8* [[TMP6]], i8** [[_TMP2]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]* [[B3]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast [10 x [10 x i32]]* [[B3]] to i8*, !dbg [[DBG70]] -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [10 x [10 x i32]]* [[TMP4]] to i8*, !dbg [[DBG70]] -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 400, i1 false), !dbg [[DBG70]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[F]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG75:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 1, !dbg [[DBG76:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG76]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX4]], i64 0, i64 1, !dbg [[DBG76]] -// CHECK1-NEXT: store i32* [[ARRAYIDX5]], i32** [[F]], align 8, !dbg [[DBG75]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[G]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]] -// CHECK1-NEXT: store i32* [[A_ADDR]], i32** [[G]], align 8, !dbg [[DBG78]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[H]], metadata [[META79:![0-9]+]], metadata !DIExpression()), !dbg [[DBG80:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[B3]], i64 0, i64 1, !dbg [[DBG81:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG81]] -// CHECK1-NEXT: store i32* [[ARRAYIDX7]], i32** [[H]], align 8, !dbg [[DBG80]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[D]], metadata [[META82:![0-9]+]], metadata !DIExpression()), !dbg [[DBG83:![0-9]+]] -// CHECK1-NEXT: store i32 15, i32* [[D]], align 4, !dbg [[DBG83]] -// CHECK1-NEXT: store i32 5, i32* [[A_ADDR]], align 4, !dbg [[DBG84:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[B3]], i64 0, i64 0, !dbg [[DBG85:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG86:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG85]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX8]], i64 0, i64 [[IDXPROM]], !dbg [[DBG85]] -// CHECK1-NEXT: store i32 10, i32* [[ARRAYIDX9]], align 4, !dbg [[DBG87:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG88:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX10]], i64 0, i64 0, !dbg [[DBG88]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG89:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG88]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX11]], i64 0, i64 [[IDXPROM12]], !dbg [[DBG88]] -// CHECK1-NEXT: store i32 11, i32* [[ARRAYIDX13]], align 4, !dbg [[DBG90:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG91:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX14]], i64 0, i64 0, !dbg [[DBG91]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG92:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG91]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG65]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata %struct.anon** [[__CONTEXT_ADDR]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG65]] +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG68:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP1]], align 8, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8, !dbg [[DBG68]] +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[A]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP5]], align 8, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !dbg [[DBG68]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]* [[B]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG65]] +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [10 x [10 x i32]]* [[B]] to i8*, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [10 x [10 x i32]]* [[TMP6]] to i8*, !dbg [[DBG68]] +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i64 400, i1 false), !dbg [[DBG68]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[F]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG73:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 1, !dbg [[DBG74:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG74]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX1]], i64 0, i64 1, !dbg [[DBG74]] +// CHECK1-NEXT: store i32* [[ARRAYIDX2]], i32** [[F]], align 8, !dbg [[DBG73]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[G]], metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76:![0-9]+]] +// CHECK1-NEXT: store i32* [[A]], i32** [[G]], align 8, !dbg [[DBG76]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[H]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[B]], i64 0, i64 1, !dbg [[DBG79:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG79]] +// CHECK1-NEXT: store i32* [[ARRAYIDX4]], i32** [[H]], align 8, !dbg [[DBG78]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[D]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG81:![0-9]+]] +// CHECK1-NEXT: store i32 15, i32* [[D]], align 4, !dbg [[DBG81]] +// CHECK1-NEXT: store i32 5, i32* [[A]], align 4, !dbg [[DBG82:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[B]], i64 0, i64 0, !dbg [[DBG83:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG84:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG83]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX5]], i64 0, i64 [[IDXPROM]], !dbg [[DBG83]] +// CHECK1-NEXT: store i32 10, i32* [[ARRAYIDX6]], align 4, !dbg [[DBG85:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG86:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX7]], i64 0, i64 0, !dbg [[DBG86]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG87:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG86]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX8]], i64 0, i64 [[IDXPROM9]], !dbg [[DBG86]] +// CHECK1-NEXT: store i32 11, i32* [[ARRAYIDX10]], align 4, !dbg [[DBG88:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG89:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX11]], i64 0, i64 0, !dbg [[DBG89]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG90:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG89]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX12]], i64 0, i64 [[IDXPROM13]], !dbg [[DBG89]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4, !dbg [[DBG89]] +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[B]], i64 0, i64 0, !dbg [[DBG91:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG92:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP15]] to i64, !dbg [[DBG91]] // CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG91]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX17]], align 4, !dbg [[DBG91]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[B3]], i64 0, i64 0, !dbg [[DBG93:![0-9]+]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG94:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG93]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG93]] -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[ARRAYIDX20]], align 4, !dbg [[DBG95:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[B3]], i64 0, i64 0, !dbg [[DBG96:![0-9]+]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG97:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP15]] to i64, !dbg [[DBG96]] -// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG96]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX23]], align 4, !dbg [[DBG96]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i8, i8* [[TMP7]], align 1, !dbg [[DBG98:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG98]] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG98]] -// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP16]], !dbg [[DBG98]] -// CHECK1-NEXT: [[TOBOOL24:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG98]] -// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL24]] to i8, !dbg [[DBG98]] -// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[TMP7]], align 1, !dbg [[DBG98]] -// CHECK1-NEXT: ret void, !dbg [[DBG99:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x [10 x i32]]]* noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[B:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG100:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META107:![0-9]+]], metadata !DIExpression()), !dbg [[DBG108:![0-9]+]] -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META109:![0-9]+]], metadata !DIExpression()), !dbg [[DBG108]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[C]], [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META110:![0-9]+]], metadata !DIExpression()), !dbg [[DBG108]] -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[A_ADDR]], metadata [[META111:![0-9]+]], metadata !DIExpression()), !dbg [[DBG108]] -// CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META112:![0-9]+]], metadata !DIExpression()), !dbg [[DBG108]] -// CHECK1-NEXT: store i8* [[BB]], i8** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META113:![0-9]+]], metadata !DIExpression()), !dbg [[DBG108]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG114:![0-9]+]] -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP7:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP5]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast i8* [[TMP8]] to i8 addrspace(1)*, !dbg [[DBG114]] -// CHECK1-NEXT: call void @__omp_outlined___debug__(i32* [[TMP3]], i32* [[TMP4]], [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP9]], i32 [[TMP6]], [10 x [10 x i32]]* [[TMP7]], i8 addrspace(1)* [[TMP10]]) #[[ATTR3:[0-9]+]], !dbg [[DBG114]] -// CHECK1-NEXT: ret void, !dbg [[DBG114]] +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX17]], align 4, !dbg [[DBG93:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[B]], i64 0, i64 0, !dbg [[DBG94:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG95:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG94]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG94]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4, !dbg [[DBG94]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, i8* [[TMP8]], align 1, !dbg [[DBG96:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP18]] to i1, !dbg [[DBG96]] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG96]] +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP17]], !dbg [[DBG96]] +// CHECK1-NEXT: [[TOBOOL21:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG96]] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL21]] to i8, !dbg [[DBG96]] +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[TMP8]], align 1, !dbg [[DBG96]] +// CHECK1-NEXT: ret void, !dbg [[DBG97:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 -// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[B:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5:[0-9]+]] !dbg [[DBG115:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[B:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR7:[0-9]+]] !dbg [[DBG98:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[C]], [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG119:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META105:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106:![0-9]+]] // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[A_ADDR]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG119]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[A_ADDR]], metadata [[META107:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]] // CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META121:![0-9]+]], metadata !DIExpression()), !dbg [[DBG119]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META108:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]] // CHECK1-NEXT: store i8* [[BB]], i8** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META122:![0-9]+]], metadata !DIExpression()), !dbg [[DBG119]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG123:![0-9]+]] -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP3]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast i8* [[TMP6]] to i8 addrspace(1)*, !dbg [[DBG123]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug__([10 x [10 x [10 x i32]]] addrspace(1)* [[TMP7]], i32 [[TMP4]], [10 x [10 x i32]]* [[TMP5]], i8 addrspace(1)* [[TMP8]]) #[[ATTR3]], !dbg [[DBG123]] -// CHECK1-NEXT: ret void, !dbg [[DBG123]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META109:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]] +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG110:![0-9]+]] +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*, !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP3]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast i8* [[TMP6]] to i8 addrspace(1)*, !dbg [[DBG110]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug__([10 x [10 x [10 x i32]]] addrspace(1)* [[TMP7]], i32 [[TMP4]], [10 x [10 x i32]]* [[TMP5]], i8 addrspace(1)* [[TMP8]]) #[[ATTR4:[0-9]+]], !dbg [[DBG110]] +// CHECK1-NEXT: ret void, !dbg [[DBG110]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug__ -// CHECK1-SAME: ([10 x [10 x [10 x i32]]] addrspace(1)* noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], [10 x [10 x i32]] addrspace(1)* noalias noundef [[B:%.*]], i8 addrspace(1)* noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG124:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]] addrspace(1)* noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], [10 x [10 x i32]] addrspace(1)* noalias noundef [[B:%.*]], i8 addrspace(1)* noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG111:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]] addrspace(1)*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 @@ -294,209 +245,158 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca [10 x [10 x i32]]*, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store [10 x [10 x [10 x i32]]] addrspace(1)* [[C]], [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG117:![0-9]+]] // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG119:![0-9]+]] // CHECK1-NEXT: store [10 x [10 x i32]] addrspace(1)* [[B]], [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121:![0-9]+]] // CHECK1-NEXT: store i8 addrspace(1)* [[BB]], i8 addrspace(1)** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8 addrspace(1)** [[BB_ADDR]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG136:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]] addrspace(1)*, [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8, !dbg [[DBG137:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP0]] to [10 x [10 x [10 x i32]]]*, !dbg [[DBG137]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP1]], [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x i32]] addrspace(1)*, [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast [10 x [10 x i32]] addrspace(1)* [[TMP3]] to [10 x [10 x i32]]*, !dbg [[DBG137]] -// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP4]], [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)** [[BB_ADDR]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast i8 addrspace(1)* [[TMP6]] to i8*, !dbg [[DBG137]] -// CHECK1-NEXT: store i8* [[TMP7]], i8** [[_TMP2]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB7:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG137]] -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG137]] -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG137]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8 addrspace(1)** [[BB_ADDR]], metadata [[META122:![0-9]+]], metadata !DIExpression()), !dbg [[DBG123:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]] addrspace(1)*, [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8, !dbg [[DBG124:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP0]] to [10 x [10 x [10 x i32]]]*, !dbg [[DBG124]] +// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP1]], [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x i32]] addrspace(1)*, [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast [10 x [10 x i32]] addrspace(1)* [[TMP3]] to [10 x [10 x i32]]*, !dbg [[DBG124]] +// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP4]], [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)** [[BB_ADDR]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast i8 addrspace(1)* [[TMP6]] to i8*, !dbg [[DBG124]] +// CHECK1-NEXT: store i8* [[TMP7]], i8** [[_TMP2]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB7:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG124]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG124]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG124]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB9:[0-9]+]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG138:![0-9]+]] -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*, !dbg [[DBG138]] -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV]], align 4, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[A_CASTED]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast [10 x [10 x [10 x i32]]]* [[TMP2]] to i8*, !dbg [[DBG138]] -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP12]] to i8*, !dbg [[DBG138]] -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [10 x [10 x i32]]* [[TMP5]] to i8*, !dbg [[DBG138]] -// CHECK1-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG138]] -// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP19]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**, !dbg [[DBG138]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB9]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x [10 x [10 x i32]]]*, i64, [10 x [10 x i32]]*, i8*)* @__omp_outlined__2 to i8*), i8* null, i8** [[TMP20]], i64 4), !dbg [[DBG138]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB11:[0-9]+]], i8 2, i1 true), !dbg [[DBG139:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG141:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG125:![0-9]+]] +// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP2]], [10 x [10 x [10 x i32]]]** [[TMP11]], align 8, !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG126:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 8, !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG125]] +// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP5]], [10 x [10 x i32]]** [[TMP14]], align 8, !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG125]] +// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP15]], align 8, !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8*, !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP17:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 32), !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP18:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP16]], i8* [[TMP17]]), !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP19:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 8, !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP18]] to %struct.anon.0*, !dbg [[DBG125]] +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP19]], %struct.anon.0* [[TMP20]], align 8, !dbg [[DBG125]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB9]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP18]]), !dbg [[DBG125]] +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP17]], i64 32), !dbg [[DBG125]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB11:[0-9]+]], i8 2, i1 true), !dbg [[DBG128:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG129:![0-9]+]] // CHECK1: worker.exit: -// CHECK1-NEXT: ret void, !dbg [[DBG137]] +// CHECK1-NEXT: ret void, !dbg [[DBG124]] // // -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___debug__1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x [10 x i32]]] addrspace(1)* noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], [10 x [10 x i32]] addrspace(1)* noalias noundef [[B:%.*]], i8 addrspace(1)* noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG142:![0-9]+]] { +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] !dbg [[DBG130:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]] addrspace(1)*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]] addrspace(1)*, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8 addrspace(1)*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[F:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[G:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[H:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG146:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META137:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138:![0-9]+]] // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META147:![0-9]+]], metadata !DIExpression()), !dbg [[DBG146]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]] addrspace(1)* [[C]], [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META148:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149:![0-9]+]] -// CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META150:![0-9]+]], metadata !DIExpression()), !dbg [[DBG151:![0-9]+]] -// CHECK1-NEXT: store [10 x [10 x i32]] addrspace(1)* [[B]], [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], metadata [[META152:![0-9]+]], metadata !DIExpression()), !dbg [[DBG153:![0-9]+]] -// CHECK1-NEXT: store i8 addrspace(1)* [[BB]], i8 addrspace(1)** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8 addrspace(1)** [[BB_ADDR]], metadata [[META154:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]] addrspace(1)*, [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8, !dbg [[DBG156:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP0]] to [10 x [10 x [10 x i32]]]*, !dbg [[DBG156]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP1]], [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x i32]] addrspace(1)*, [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast [10 x [10 x i32]] addrspace(1)* [[TMP3]] to [10 x [10 x i32]]*, !dbg [[DBG156]] -// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP4]], [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)** [[BB_ADDR]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast i8 addrspace(1)* [[TMP6]] to i8*, !dbg [[DBG156]] -// CHECK1-NEXT: store i8* [[TMP7]], i8** [[_TMP2]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[F]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG159:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 1, !dbg [[DBG160:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG160]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG160]] -// CHECK1-NEXT: store i32* [[ARRAYIDX4]], i32** [[F]], align 8, !dbg [[DBG159]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[G]], metadata [[META161:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]] -// CHECK1-NEXT: store i32* [[A_ADDR]], i32** [[G]], align 8, !dbg [[DBG162]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[H]], metadata [[META163:![0-9]+]], metadata !DIExpression()), !dbg [[DBG164:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP5]], i64 0, i64 1, !dbg [[DBG165:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX5]], i64 0, i64 1, !dbg [[DBG165]] -// CHECK1-NEXT: store i32* [[ARRAYIDX6]], i32** [[H]], align 8, !dbg [[DBG164]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[D]], metadata [[META166:![0-9]+]], metadata !DIExpression()), !dbg [[DBG167:![0-9]+]] -// CHECK1-NEXT: store i32 15, i32* [[D]], align 4, !dbg [[DBG167]] -// CHECK1-NEXT: store i32 5, i32* [[A_ADDR]], align 4, !dbg [[DBG168:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP5]], i64 0, i64 0, !dbg [[DBG169:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG170:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG169]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX7]], i64 0, i64 [[IDXPROM]], !dbg [[DBG169]] -// CHECK1-NEXT: store i32 10, i32* [[ARRAYIDX8]], align 4, !dbg [[DBG171:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG172:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX9]], i64 0, i64 0, !dbg [[DBG172]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG173:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG172]] -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX10]], i64 0, i64 [[IDXPROM11]], !dbg [[DBG172]] -// CHECK1-NEXT: store i32 11, i32* [[ARRAYIDX12]], align 4, !dbg [[DBG174:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG175:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG175]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG176:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG175]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG175]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4, !dbg [[DBG175]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP5]], i64 0, i64 0, !dbg [[DBG177:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG178:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG177]] -// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG177]] -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[ARRAYIDX19]], align 4, !dbg [[DBG179:![0-9]+]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP8]], align 1, !dbg [[DBG180:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1, !dbg [[DBG180]] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG180]] -// CHECK1-NEXT: store i32 [[CONV]], i32* [[D]], align 4, !dbg [[DBG181:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG182:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x [10 x i32]]]* noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[B:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG183:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META184:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185:![0-9]+]] -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META186:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[C]], [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META187:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185]] -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[A_ADDR]], metadata [[META188:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185]] -// CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META189:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185]] -// CHECK1-NEXT: store i8* [[BB]], i8** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META190:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG191:![0-9]+]] -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP7:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP5]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast [10 x [10 x i32]]* [[TMP7]] to [10 x [10 x i32]] addrspace(1)*, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast i8* [[TMP8]] to i8 addrspace(1)*, !dbg [[DBG191]] -// CHECK1-NEXT: call void @__omp_outlined___debug__1(i32* [[TMP3]], i32* [[TMP4]], [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP9]], i32 [[TMP6]], [10 x [10 x i32]] addrspace(1)* [[TMP10]], i8 addrspace(1)* [[TMP11]]) #[[ATTR3]], !dbg [[DBG191]] -// CHECK1-NEXT: ret void, !dbg [[DBG191]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META139:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138]] +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata %struct.anon.0** [[__CONTEXT_ADDR]], metadata [[META140:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138]] +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG141:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0, !dbg [[DBG141]] +// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP1]], align 8, !dbg [[DBG141]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1, !dbg [[DBG141]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8, !dbg [[DBG141]] +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[A]], align 4, !dbg [[DBG141]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2, !dbg [[DBG141]] +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP5]], align 8, !dbg [[DBG141]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3, !dbg [[DBG141]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !dbg [[DBG141]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[F]], metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG144:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 1, !dbg [[DBG145:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG145]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX1]], i64 0, i64 1, !dbg [[DBG145]] +// CHECK1-NEXT: store i32* [[ARRAYIDX2]], i32** [[F]], align 8, !dbg [[DBG144]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[G]], metadata [[META146:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147:![0-9]+]] +// CHECK1-NEXT: store i32* [[A]], i32** [[G]], align 8, !dbg [[DBG147]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[H]], metadata [[META148:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP6]], i64 0, i64 1, !dbg [[DBG150:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG150]] +// CHECK1-NEXT: store i32* [[ARRAYIDX4]], i32** [[H]], align 8, !dbg [[DBG149]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[D]], metadata [[META151:![0-9]+]], metadata !DIExpression()), !dbg [[DBG152:![0-9]+]] +// CHECK1-NEXT: store i32 15, i32* [[D]], align 4, !dbg [[DBG152]] +// CHECK1-NEXT: store i32 5, i32* [[A]], align 4, !dbg [[DBG153:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP6]], i64 0, i64 0, !dbg [[DBG154:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG155:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG154]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX5]], i64 0, i64 [[IDXPROM]], !dbg [[DBG154]] +// CHECK1-NEXT: store i32 10, i32* [[ARRAYIDX6]], align 4, !dbg [[DBG156:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG157:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX7]], i64 0, i64 0, !dbg [[DBG157]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG158:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG157]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX8]], i64 0, i64 [[IDXPROM9]], !dbg [[DBG157]] +// CHECK1-NEXT: store i32 11, i32* [[ARRAYIDX10]], align 4, !dbg [[DBG159:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG160:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX11]], i64 0, i64 0, !dbg [[DBG160]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG161:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG160]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX12]], i64 0, i64 [[IDXPROM13]], !dbg [[DBG160]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4, !dbg [[DBG160]] +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP6]], i64 0, i64 0, !dbg [[DBG162:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG163:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG162]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG162]] +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[ARRAYIDX17]], align 4, !dbg [[DBG164:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP8]], align 1, !dbg [[DBG165:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1, !dbg [[DBG165]] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG165]] +// CHECK1-NEXT: store i32 [[CONV]], i32* [[D]], align 4, !dbg [[DBG166:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG167:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37 -// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[B:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG192:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[B:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR7]] !dbg [[DBG168:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[C]], [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META193:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170:![0-9]+]] // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[A_ADDR]], metadata [[META195:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[A_ADDR]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170]] // CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170]] // CHECK1-NEXT: store i8* [[BB]], i8** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META197:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG198:![0-9]+]] -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP3]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast [10 x [10 x i32]]* [[TMP5]] to [10 x [10 x i32]] addrspace(1)*, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast i8* [[TMP6]] to i8 addrspace(1)*, !dbg [[DBG198]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug__([10 x [10 x [10 x i32]]] addrspace(1)* [[TMP7]], i32 [[TMP4]], [10 x [10 x i32]] addrspace(1)* [[TMP8]], i8 addrspace(1)* [[TMP9]]) #[[ATTR3]], !dbg [[DBG198]] -// CHECK1-NEXT: ret void, !dbg [[DBG198]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META173:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170]] +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG174:![0-9]+]] +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*, !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP3]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast [10 x [10 x i32]]* [[TMP5]] to [10 x [10 x i32]] addrspace(1)*, !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast i8* [[TMP6]] to i8 addrspace(1)*, !dbg [[DBG174]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug__([10 x [10 x [10 x i32]]] addrspace(1)* [[TMP7]], i32 [[TMP4]], [10 x [10 x i32]] addrspace(1)* [[TMP8]], i8 addrspace(1)* [[TMP9]]) #[[ATTR4]], !dbg [[DBG174]] +// CHECK1-NEXT: ret void, !dbg [[DBG174]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug__ -// CHECK1-SAME: ([10 x [10 x [10 x i32]]] addrspace(1)* noalias noundef [[C:%.*]], i32 addrspace(1)* noalias noundef [[A:%.*]], [10 x [10 x i32]] addrspace(1)* noalias noundef [[B:%.*]], i8 addrspace(1)* noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG199:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]] addrspace(1)* noalias noundef [[C:%.*]], i32 addrspace(1)* noalias noundef [[A:%.*]], [10 x [10 x i32]] addrspace(1)* noalias noundef [[B:%.*]], i8 addrspace(1)* noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG175:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]] addrspace(1)*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32 addrspace(1)*, align 8 @@ -506,213 +406,158 @@ // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca [10 x [10 x i32]]*, align 8 // CHECK1-NEXT: [[_TMP3:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK1-NEXT: store [10 x [10 x [10 x i32]]] addrspace(1)* [[C]], [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META204:![0-9]+]], metadata !DIExpression()), !dbg [[DBG205:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META180:![0-9]+]], metadata !DIExpression()), !dbg [[DBG181:![0-9]+]] // CHECK1-NEXT: store i32 addrspace(1)* [[A]], i32 addrspace(1)** [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32 addrspace(1)** [[A_ADDR]], metadata [[META206:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32 addrspace(1)** [[A_ADDR]], metadata [[META182:![0-9]+]], metadata !DIExpression()), !dbg [[DBG183:![0-9]+]] // CHECK1-NEXT: store [10 x [10 x i32]] addrspace(1)* [[B]], [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], metadata [[META208:![0-9]+]], metadata !DIExpression()), !dbg [[DBG209:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], metadata [[META184:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185:![0-9]+]] // CHECK1-NEXT: store i8 addrspace(1)* [[BB]], i8 addrspace(1)** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8 addrspace(1)** [[BB_ADDR]], metadata [[META210:![0-9]+]], metadata !DIExpression()), !dbg [[DBG211:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]] addrspace(1)*, [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8, !dbg [[DBG212:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP0]] to [10 x [10 x [10 x i32]]]*, !dbg [[DBG212]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP1]], [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)** [[A_ADDR]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast i32 addrspace(1)* [[TMP3]] to i32*, !dbg [[DBG212]] -// CHECK1-NEXT: store i32* [[TMP4]], i32** [[_TMP1]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[_TMP1]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]] addrspace(1)*, [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x i32]] addrspace(1)* [[TMP6]] to [10 x [10 x i32]]*, !dbg [[DBG212]] -// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP7]], [10 x [10 x i32]]** [[_TMP2]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP8:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[_TMP2]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)** [[BB_ADDR]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast i8 addrspace(1)* [[TMP9]] to i8*, !dbg [[DBG212]] -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[_TMP3]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[_TMP3]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB13:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG212]] -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG212]] -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG212]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8 addrspace(1)** [[BB_ADDR]], metadata [[META186:![0-9]+]], metadata !DIExpression()), !dbg [[DBG187:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]] addrspace(1)*, [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8, !dbg [[DBG188:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP0]] to [10 x [10 x [10 x i32]]]*, !dbg [[DBG188]] +// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP1]], [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP3:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)** [[A_ADDR]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast i32 addrspace(1)* [[TMP3]] to i32*, !dbg [[DBG188]] +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[_TMP1]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[_TMP1]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]] addrspace(1)*, [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x i32]] addrspace(1)* [[TMP6]] to [10 x [10 x i32]]*, !dbg [[DBG188]] +// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP7]], [10 x [10 x i32]]** [[_TMP2]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP8:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[_TMP2]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)** [[BB_ADDR]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast i8 addrspace(1)* [[TMP9]] to i8*, !dbg [[DBG188]] +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[_TMP3]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[_TMP3]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB13:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG188]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG188]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG188]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB15:[0-9]+]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG213:![0-9]+]] -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast [10 x [10 x [10 x i32]]]* [[TMP2]] to i8*, !dbg [[DBG213]] -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP5]] to i8*, !dbg [[DBG213]] -// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast [10 x [10 x i32]]* [[TMP8]] to i8*, !dbg [[DBG213]] -// CHECK1-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG213]] -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP20]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**, !dbg [[DBG213]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB15]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x [10 x [10 x i32]]]*, i32*, [10 x [10 x i32]]*, i8*)* @__omp_outlined__4 to i8*), i8* null, i8** [[TMP21]], i64 4), !dbg [[DBG213]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB17:[0-9]+]], i8 2, i1 true), !dbg [[DBG214:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG216:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG189:![0-9]+]] +// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP2]], [10 x [10 x [10 x i32]]]** [[TMP14]], align 8, !dbg [[DBG189]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG189]] +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[TMP15]], align 8, !dbg [[DBG189]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG189]] +// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP8]], [10 x [10 x i32]]** [[TMP16]], align 8, !dbg [[DBG189]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG189]] +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP17]], align 8, !dbg [[DBG189]] +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.anon.1* [[DOTTMP_OUTLINED_AGG_ARG]] to i8*, !dbg [[DBG189]] +// CHECK1-NEXT: [[TMP19:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 32), !dbg [[DBG189]] +// CHECK1-NEXT: [[TMP20:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP18]], i8* [[TMP19]]), !dbg [[DBG189]] +// CHECK1-NEXT: [[TMP21:%.*]] = load [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], align 8, !dbg [[DBG189]] +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP20]] to %struct.anon.1*, !dbg [[DBG189]] +// CHECK1-NEXT: store [[STRUCT_ANON_1]] [[TMP21]], %struct.anon.1* [[TMP22]], align 8, !dbg [[DBG189]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB15]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.1*)* @__omp_outlined__2 to i8*), i8* null, i8* [[TMP20]]), !dbg [[DBG189]] +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP19]], i64 32), !dbg [[DBG189]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB17:[0-9]+]], i8 2, i1 true), !dbg [[DBG190:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG192:![0-9]+]] // CHECK1: worker.exit: -// CHECK1-NEXT: ret void, !dbg [[DBG212]] +// CHECK1-NEXT: ret void, !dbg [[DBG188]] // // -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___debug__3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x [10 x i32]]] addrspace(1)* noalias noundef [[C:%.*]], i32 addrspace(1)* noalias noundef [[A:%.*]], [10 x [10 x i32]] addrspace(1)* noalias noundef [[B:%.*]], i8 addrspace(1)* noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG217:![0-9]+]] { +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] !dbg [[DBG193:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]] addrspace(1)*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32 addrspace(1)*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]] addrspace(1)*, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8 addrspace(1)*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[F:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[G:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[H:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META220:![0-9]+]], metadata !DIExpression()), !dbg [[DBG221:![0-9]+]] -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META222:![0-9]+]], metadata !DIExpression()), !dbg [[DBG221]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]] addrspace(1)* [[C]], [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META223:![0-9]+]], metadata !DIExpression()), !dbg [[DBG224:![0-9]+]] -// CHECK1-NEXT: store i32 addrspace(1)* [[A]], i32 addrspace(1)** [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32 addrspace(1)** [[A_ADDR]], metadata [[META225:![0-9]+]], metadata !DIExpression()), !dbg [[DBG226:![0-9]+]] -// CHECK1-NEXT: store [10 x [10 x i32]] addrspace(1)* [[B]], [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], metadata [[META227:![0-9]+]], metadata !DIExpression()), !dbg [[DBG228:![0-9]+]] -// CHECK1-NEXT: store i8 addrspace(1)* [[BB]], i8 addrspace(1)** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8 addrspace(1)** [[BB_ADDR]], metadata [[META229:![0-9]+]], metadata !DIExpression()), !dbg [[DBG230:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]] addrspace(1)*, [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8, !dbg [[DBG231:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP0]] to [10 x [10 x [10 x i32]]]*, !dbg [[DBG231]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP1]], [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)** [[A_ADDR]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast i32 addrspace(1)* [[TMP3]] to i32*, !dbg [[DBG231]] -// CHECK1-NEXT: store i32* [[TMP4]], i32** [[_TMP1]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[_TMP1]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]] addrspace(1)*, [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x i32]] addrspace(1)* [[TMP6]] to [10 x [10 x i32]]*, !dbg [[DBG231]] -// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP7]], [10 x [10 x i32]]** [[_TMP2]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP8:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[_TMP2]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)** [[BB_ADDR]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast i8 addrspace(1)* [[TMP9]] to i8*, !dbg [[DBG231]] -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[_TMP3]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[_TMP3]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[F]], metadata [[META232:![0-9]+]], metadata !DIExpression()), !dbg [[DBG234:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 1, !dbg [[DBG235:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG235]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX4]], i64 0, i64 1, !dbg [[DBG235]] -// CHECK1-NEXT: store i32* [[ARRAYIDX5]], i32** [[F]], align 8, !dbg [[DBG234]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[G]], metadata [[META236:![0-9]+]], metadata !DIExpression()), !dbg [[DBG237:![0-9]+]] -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[G]], align 8, !dbg [[DBG237]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[H]], metadata [[META238:![0-9]+]], metadata !DIExpression()), !dbg [[DBG239:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP8]], i64 0, i64 1, !dbg [[DBG240:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG240]] -// CHECK1-NEXT: store i32* [[ARRAYIDX7]], i32** [[H]], align 8, !dbg [[DBG239]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[D]], metadata [[META241:![0-9]+]], metadata !DIExpression()), !dbg [[DBG242:![0-9]+]] -// CHECK1-NEXT: store i32 15, i32* [[D]], align 4, !dbg [[DBG242]] -// CHECK1-NEXT: store i32 5, i32* [[TMP5]], align 4, !dbg [[DBG243:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP8]], i64 0, i64 0, !dbg [[DBG244:![0-9]+]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG245:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG244]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX8]], i64 0, i64 [[IDXPROM]], !dbg [[DBG244]] -// CHECK1-NEXT: store i32 10, i32* [[ARRAYIDX9]], align 4, !dbg [[DBG246:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG247:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX10]], i64 0, i64 0, !dbg [[DBG247]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG248:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG247]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX11]], i64 0, i64 [[IDXPROM12]], !dbg [[DBG247]] -// CHECK1-NEXT: store i32 11, i32* [[ARRAYIDX13]], align 4, !dbg [[DBG249:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG250:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX14]], i64 0, i64 0, !dbg [[DBG250]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG251:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG250]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG250]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX17]], align 4, !dbg [[DBG250]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP8]], i64 0, i64 0, !dbg [[DBG252:![0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG253:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG252]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG252]] -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX20]], align 4, !dbg [[DBG254:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP8]], i64 0, i64 0, !dbg [[DBG255:![0-9]+]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG256:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP17]] to i64, !dbg [[DBG255]] -// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX23]], align 4, !dbg [[DBG255]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP18]], 0, !dbg [[DBG255]] -// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG257:![0-9]+]] -// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[TMP11]], align 1, !dbg [[DBG257]] -// CHECK1-NEXT: ret void, !dbg [[DBG258:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x [10 x i32]]]* noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[B:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG259:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META262:![0-9]+]], metadata !DIExpression()), !dbg [[DBG263:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG201:![0-9]+]] // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META264:![0-9]+]], metadata !DIExpression()), !dbg [[DBG263]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[C]], [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META265:![0-9]+]], metadata !DIExpression()), !dbg [[DBG263]] -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META266:![0-9]+]], metadata !DIExpression()), !dbg [[DBG263]] -// CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META267:![0-9]+]], metadata !DIExpression()), !dbg [[DBG263]] -// CHECK1-NEXT: store i8* [[BB]], i8** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META268:![0-9]+]], metadata !DIExpression()), !dbg [[DBG263]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG269:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP8:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP6]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast i32* [[TMP7]] to i32 addrspace(1)*, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast [10 x [10 x i32]]* [[TMP8]] to [10 x [10 x i32]] addrspace(1)*, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP13:%.*]] = addrspacecast i8* [[TMP9]] to i8 addrspace(1)*, !dbg [[DBG269]] -// CHECK1-NEXT: call void @__omp_outlined___debug__3(i32* [[TMP4]], i32* [[TMP5]], [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP10]], i32 addrspace(1)* [[TMP11]], [10 x [10 x i32]] addrspace(1)* [[TMP12]], i8 addrspace(1)* [[TMP13]]) #[[ATTR3]], !dbg [[DBG269]] -// CHECK1-NEXT: ret void, !dbg [[DBG269]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META202:![0-9]+]], metadata !DIExpression()), !dbg [[DBG201]] +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata %struct.anon.1** [[__CONTEXT_ADDR]], metadata [[META203:![0-9]+]], metadata !DIExpression()), !dbg [[DBG201]] +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG204:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0, !dbg [[DBG204]] +// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP1]], align 8, !dbg [[DBG204]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1, !dbg [[DBG204]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !dbg [[DBG204]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2, !dbg [[DBG204]] +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP5]], align 8, !dbg [[DBG204]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3, !dbg [[DBG204]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !dbg [[DBG204]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[F]], metadata [[META205:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 1, !dbg [[DBG208:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG208]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX1]], i64 0, i64 1, !dbg [[DBG208]] +// CHECK1-NEXT: store i32* [[ARRAYIDX2]], i32** [[F]], align 8, !dbg [[DBG207]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[G]], metadata [[META209:![0-9]+]], metadata !DIExpression()), !dbg [[DBG210:![0-9]+]] +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[G]], align 8, !dbg [[DBG210]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[H]], metadata [[META211:![0-9]+]], metadata !DIExpression()), !dbg [[DBG212:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP6]], i64 0, i64 1, !dbg [[DBG213:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG213]] +// CHECK1-NEXT: store i32* [[ARRAYIDX4]], i32** [[H]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[D]], metadata [[META214:![0-9]+]], metadata !DIExpression()), !dbg [[DBG215:![0-9]+]] +// CHECK1-NEXT: store i32 15, i32* [[D]], align 4, !dbg [[DBG215]] +// CHECK1-NEXT: store i32 5, i32* [[TMP4]], align 4, !dbg [[DBG216:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP6]], i64 0, i64 0, !dbg [[DBG217:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG218:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG217]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX5]], i64 0, i64 [[IDXPROM]], !dbg [[DBG217]] +// CHECK1-NEXT: store i32 10, i32* [[ARRAYIDX6]], align 4, !dbg [[DBG219:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG220:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX7]], i64 0, i64 0, !dbg [[DBG220]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG221:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG220]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX8]], i64 0, i64 [[IDXPROM9]], !dbg [[DBG220]] +// CHECK1-NEXT: store i32 11, i32* [[ARRAYIDX10]], align 4, !dbg [[DBG222:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG223:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX11]], i64 0, i64 0, !dbg [[DBG223]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG224:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG223]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX12]], i64 0, i64 [[IDXPROM13]], !dbg [[DBG223]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4, !dbg [[DBG223]] +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP6]], i64 0, i64 0, !dbg [[DBG225:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG226:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG225]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG225]] +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[ARRAYIDX17]], align 4, !dbg [[DBG227:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP6]], i64 0, i64 0, !dbg [[DBG228:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG229:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG228]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4, !dbg [[DBG228]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0, !dbg [[DBG228]] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG230:![0-9]+]] +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[TMP8]], align 1, !dbg [[DBG230]] +// CHECK1-NEXT: ret void, !dbg [[DBG231:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51 -// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[B:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG270:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[B:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR7]] !dbg [[DBG232:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[C]], [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META273:![0-9]+]], metadata !DIExpression()), !dbg [[DBG274:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META235:![0-9]+]], metadata !DIExpression()), !dbg [[DBG236:![0-9]+]] // CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META275:![0-9]+]], metadata !DIExpression()), !dbg [[DBG274]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META237:![0-9]+]], metadata !DIExpression()), !dbg [[DBG236]] // CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META276:![0-9]+]], metadata !DIExpression()), !dbg [[DBG274]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META238:![0-9]+]], metadata !DIExpression()), !dbg [[DBG236]] // CHECK1-NEXT: store i8* [[BB]], i8** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META277:![0-9]+]], metadata !DIExpression()), !dbg [[DBG274]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG278:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP4:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP4]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast i32* [[TMP5]] to i32 addrspace(1)*, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast [10 x [10 x i32]]* [[TMP6]] to [10 x [10 x i32]] addrspace(1)*, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast i8* [[TMP7]] to i8 addrspace(1)*, !dbg [[DBG278]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug__([10 x [10 x [10 x i32]]] addrspace(1)* [[TMP8]], i32 addrspace(1)* [[TMP9]], [10 x [10 x i32]] addrspace(1)* [[TMP10]], i8 addrspace(1)* [[TMP11]]) #[[ATTR3]], !dbg [[DBG278]] -// CHECK1-NEXT: ret void, !dbg [[DBG278]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META239:![0-9]+]], metadata !DIExpression()), !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG240:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP4:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP4]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast i32* [[TMP5]] to i32 addrspace(1)*, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast [10 x [10 x i32]]* [[TMP6]] to [10 x [10 x i32]] addrspace(1)*, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast i8* [[TMP7]] to i8 addrspace(1)*, !dbg [[DBG240]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug__([10 x [10 x [10 x i32]]] addrspace(1)* [[TMP8]], i32 addrspace(1)* [[TMP9]], [10 x [10 x i32]] addrspace(1)* [[TMP10]], i8 addrspace(1)* [[TMP11]]) #[[ATTR4]], !dbg [[DBG240]] +// CHECK1-NEXT: ret void, !dbg [[DBG240]] // diff --git a/clang/test/OpenMP/target_parallel_for_codegen.cpp b/clang/test/OpenMP/target_parallel_for_codegen.cpp --- a/clang/test/OpenMP/target_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_codegen.cpp @@ -344,7 +344,7 @@ // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK1-NEXT: [[A_CASTED6:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_CASTED8:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8 @@ -428,13 +428,13 @@ // CHECK1-NEXT: store i8* null, i8** [[TMP33]], align 8 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 0 // CHECK1-NEXT: [[TMP37:%.*]] = load i16, i16* [[AA]], align 2 // CHECK1-NEXT: store i16 [[TMP37]], i16* [[TMP36]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[LIN]], align 4 // CHECK1-NEXT: store i32 [[TMP39]], i32* [[TMP38]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 2 // CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[A]], align 4 // CHECK1-NEXT: store i32 [[TMP41]], i32* [[TMP40]], align 4 // CHECK1-NEXT: [[TMP42:%.*]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i64 120, i64 12, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*), i64 -1) @@ -442,10 +442,10 @@ // CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP43]], i32 0, i32 0 // CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP44]], i32 0, i32 0 // CHECK1-NEXT: [[TMP46:%.*]] = load i8*, i8** [[TMP45]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 12, i1 false) // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP43]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP49:%.*]] = bitcast i8* [[TMP46]] to %struct.anon* +// CHECK1-NEXT: [[TMP49:%.*]] = bitcast i8* [[TMP46]] to %struct.anon.2* // CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP48]], i32 0, i32 0 // CHECK1-NEXT: [[TMP51:%.*]] = bitcast [3 x i8*]* [[TMP50]] to i8* // CHECK1-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP34]] to i8* @@ -630,15 +630,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -648,46 +650,48 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 5, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK1-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK1: .cancel.exit2: // CHECK1-NEXT: br label [[CANCEL_EXIT]] // CHECK1: .cancel.continue3: @@ -695,19 +699,19 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: cancel.cont: // CHECK1-NEXT: ret void // CHECK1: cancel.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT]] // // @@ -716,29 +720,28 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[K_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[K_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP2]], i64* [[K_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[K_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[K_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[TMP3]], i64* [[TMP2]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[K:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -750,66 +753,71 @@ // CHECK1-NEXT: [[K1:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[K_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP0]], i64* [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[K]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[K]], align 8 +// CHECK1-NEXT: store i64 [[TMP5]], i64* [[DOTLINEAR_START]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 8, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]]) -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 35, i32 0, i32 8, i32 1, i32 1) +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 35, i32 0, i32 8, i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK1-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[A]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[K1]], align 8 -// CHECK1-NEXT: store i64 [[TMP14]], i64* [[K_ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[K1]], align 8 +// CHECK1-NEXT: store i64 [[TMP19]], i64* [[K]], align 8 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -821,138 +829,139 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[LIN_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[LIN]], i64* [[LIN_ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK1-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[LIN:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTLINEAR_START3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTLINEAR_START1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_STEP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[IT:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[LIN4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[A5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[LIN2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[LIN]], i64* [[LIN_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 4 +// CHECK1-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[LIN]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[LIN]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START1]], align 4 // CHECK1-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK1-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK1-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK1-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 -// CHECK1-NEXT: [[MUL8:%.*]] = mul i64 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: [[ADD:%.*]] = add i64 [[CONV7]], [[MUL8]] -// CHECK1-NEXT: [[CONV9:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK1-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4 -// CHECK1-NEXT: [[CONV10:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 -// CHECK1-NEXT: [[MUL11:%.*]] = mul i64 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] -// CHECK1-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 -// CHECK1-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK1-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 -// CHECK1-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK1-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] +// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK1-NEXT: store i32 [[CONV6]], i32* [[LIN2]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4 +// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK1-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] +// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 +// CHECK1-NEXT: store i32 [[CONV10]], i32* [[A3]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK1-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 +// CHECK1-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 +// CHECK1-NEXT: store i16 [[CONV13]], i16* [[AA]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD17:%.*]] = add i64 [[TMP17]], 1 -// CHECK1-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 +// CHECK1-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK1-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[LIN2]], align 4 +// CHECK1-NEXT: store i32 [[TMP27]], i32* [[LIN]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], i32* [[A]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -995,7 +1004,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i16*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca [3 x i8*]*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [3 x i8*]*, align 8 @@ -1013,7 +1022,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -1026,8 +1035,8 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @@ -1039,8 +1048,8 @@ // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l138.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* [[TMP22]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0, i32 0, i8* null, i32 0, i8* null) #[[ATTR4]] // CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK1-NEXT: br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__3_EXIT:%.*]] @@ -1068,31 +1077,29 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1102,62 +1109,66 @@ // CHECK1-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 3, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 -// CHECK1-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK1-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK1-NEXT: store i16 [[CONV]], i16* [[IT]], align 2 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 +// CHECK1-NEXT: store i16 [[CONV5]], i16* [[AA]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK1-NEXT: ret void // // @@ -1174,8 +1185,7 @@ // CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -1196,33 +1206,40 @@ // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [10 x float]*, i64, float*, [5 x [10 x double]]*, i64, i64, double*, %struct.TT*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], [10 x float]* [[TMP0]], i64 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]], i64 [[TMP11]]) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store float* [[TMP2]], float** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP5]], i64* [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK1-NEXT: store double* [[TMP6]], double** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK1-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV5]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1232,124 +1249,128 @@ // CHECK1-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 -// CHECK1-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 -// CHECK1-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 10 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 8 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] -// CHECK1-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK1-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double -// CHECK1-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 -// CHECK1-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK1-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4 -// CHECK1-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double -// CHECK1-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 -// CHECK1-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK1-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4 -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8 -// CHECK1-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8 -// CHECK1-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8 -// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8 -// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK1-NEXT: store i64 [[ADD22]], i64* [[X]], align 8 -// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8 -// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 -// CHECK1-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK1-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 +// CHECK1-NEXT: store i8 [[CONV]], i8* [[IT]], align 1 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP33:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK1-NEXT: store float [[CONV5]], float* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 3 +// CHECK1-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double +// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 +// CHECK1-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float +// CHECK1-NEXT: store float [[CONV9]], float* [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX10]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 [[TMP36]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX13]], i64 3 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK1-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8 +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i64, i64* [[X]], align 8 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK1-NEXT: store i64 [[ADD16]], i64* [[X]], align 8 +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP39:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK1-NEXT: store i8 [[CONV19]], i8* [[Y]], align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK1-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK1-NEXT: store i32 [[ADD27]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK1-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) // CHECK1-NEXT: ret void // // @@ -1654,7 +1675,7 @@ // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -1665,24 +1686,28 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -1692,73 +1717,76 @@ // CHECK1-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 8 // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK1-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK1-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], double* [[A]], align 8 -// CHECK1-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], double* [[A5]], align 8 -// CHECK1-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP14]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK1-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = load double, double* [[A2]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], double* [[A2]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i64 [[TMP21]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV3]], i16* [[ARRAYIDX4]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD8:%.*]] = add i64 [[TMP15]], 1 -// CHECK1-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK1-NEXT: store i64 [[ADD5]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -1769,9 +1797,7 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 @@ -1780,43 +1806,47 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* -// CHECK1-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV2]], align 1 +// CHECK1-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK1-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 8 // CHECK1-NEXT: ret void // // @@ -1826,34 +1856,33 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -1863,67 +1892,71 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 6, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK1-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 +// CHECK1-NEXT: store i64 [[ADD6]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -1961,7 +1994,7 @@ // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[A_CASTED3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_CASTED4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS6:%.*]] = alloca [2 x i8*], align 4 @@ -2037,13 +2070,13 @@ // CHECK3-NEXT: store i8* null, i8** [[TMP29]], align 4 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 0 // CHECK3-NEXT: [[TMP33:%.*]] = load i16, i16* [[AA]], align 2 // CHECK3-NEXT: store i16 [[TMP33]], i16* [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[LIN]], align 4 // CHECK3-NEXT: store i32 [[TMP35]], i32* [[TMP34]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 2 // CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[A]], align 4 // CHECK3-NEXT: store i32 [[TMP37]], i32* [[TMP36]], align 4 // CHECK3-NEXT: [[TMP38:%.*]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 72, i32 12, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*), i64 -1) @@ -2051,10 +2084,10 @@ // CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP39]], i32 0, i32 0 // CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP40]], i32 0, i32 0 // CHECK3-NEXT: [[TMP42:%.*]] = load i8*, i8** [[TMP41]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// CHECK3-NEXT: [[TMP43:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i32 12, i1 false) // CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP39]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to %struct.anon* +// CHECK3-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to %struct.anon.2* // CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP44]], i32 0, i32 0 // CHECK3-NEXT: [[TMP47:%.*]] = bitcast [3 x i64]* [[TMP46]] to i8* // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP47]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false) @@ -2238,15 +2271,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2256,46 +2291,48 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 5, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK3-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK3: .cancel.exit: // CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK3: .cancel.continue: -// CHECK3-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK3-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK3-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK3: .cancel.exit2: // CHECK3-NEXT: br label [[CANCEL_EXIT]] // CHECK3: .cancel.continue3: @@ -2303,19 +2340,19 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: cancel.cont: // CHECK3-NEXT: ret void // CHECK3: cancel.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[CANCEL_CONT]] // // @@ -2324,24 +2361,26 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[K_ADDR:%.*]] = alloca i64*, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i64* [[K]], i64** [[K_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i64*, i64** [[K_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i64*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP2]], i64* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i64* [[TMP0]], i64** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i64* noundef nonnull align 4 dereferenceable(8) [[K:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[K_ADDR:%.*]] = alloca i64*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -2350,69 +2389,73 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[K1:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i64* [[K]], i64** [[K_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64*, i64** [[K_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 8 -// CHECK3-NEXT: store i64 [[TMP1]], i64* [[DOTLINEAR_START]], align 8 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK3-NEXT: store i64 [[TMP5]], i64* [[DOTLINEAR_START]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 8, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 35, i32 0, i32 8, i32 1, i32 1) +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 35, i32 0, i32 8, i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP4]], 0 +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK3-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 -// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK3-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[MUL1:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[MUL1]] to i64 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] +// CHECK3-NEXT: store i64 [[ADD]], i64* [[K]], align 8, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !13 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[K1]], align 8 -// CHECK3-NEXT: store i64 [[TMP15]], i64* [[TMP0]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = load i64, i64* [[K]], align 8 +// CHECK3-NEXT: store i64 [[TMP19]], i64* [[TMP4]], align 8 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: // CHECK3-NEXT: ret void @@ -2424,35 +2467,33 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[LIN_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[LIN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[LIN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[LIN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[LIN_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[LIN:%.*]], i32 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK3-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -2467,89 +2508,96 @@ // CHECK3-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[LIN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START1]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 4 +// CHECK3-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[LIN]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[LIN]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START1]], align 4 // CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK3-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 // CHECK3-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP13]], i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK3-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK3-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 -// CHECK3-NEXT: [[MUL6:%.*]] = mul i64 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: [[ADD:%.*]] = add i64 [[CONV5]], [[MUL6]] -// CHECK3-NEXT: [[CONV7:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK3-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4 -// CHECK3-NEXT: [[CONV8:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK3-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 -// CHECK3-NEXT: [[MUL9:%.*]] = mul i64 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] -// CHECK3-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 -// CHECK3-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 -// CHECK3-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK3-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] +// CHECK3-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK3-NEXT: store i32 [[CONV6]], i32* [[LIN2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4 +// CHECK3-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK3-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK3-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] +// CHECK3-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 +// CHECK3-NEXT: store i32 [[CONV10]], i32* [[A3]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK3-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 +// CHECK3-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 +// CHECK3-NEXT: store i16 [[CONV13]], i16* [[AA]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[ADD15:%.*]] = add i64 [[TMP17]], 1 -// CHECK3-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 +// CHECK3-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN2]], align 4 -// CHECK3-NEXT: store i32 [[TMP20]], i32* [[LIN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK3-NEXT: store i32 [[TMP21]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[LIN2]], align 4 +// CHECK3-NEXT: store i32 [[TMP27]], i32* [[LIN]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK3-NEXT: store i32 [[TMP28]], i32* [[A]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: // CHECK3-NEXT: ret void @@ -2592,7 +2640,7 @@ // CHECK3-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 4 // CHECK3-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 4 // CHECK3-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i16*, align 4 // CHECK3-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca [3 x i8*]*, align 4 // CHECK3-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [3 x i8*]*, align 4 @@ -2610,7 +2658,7 @@ // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK3-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -2623,8 +2671,8 @@ // CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 // CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 // CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !25 -// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 -// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @@ -2636,8 +2684,8 @@ // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP12]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP12]], i32 0, i32 2 // CHECK3-NEXT: [[TMP25:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l138.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* [[TMP22]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0, i32 0, i8* null, i32 0, i8* null) #[[ATTR4]] // CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK3-NEXT: br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__3_EXIT:%.*]] @@ -2663,29 +2711,28 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2695,61 +2742,66 @@ // CHECK3-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 3, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] -// CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK3-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK3-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK3-NEXT: store i16 [[CONV]], i16* [[IT]], align 2 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[AA]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK3-NEXT: ret void // // @@ -2766,8 +2818,7 @@ // CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -2786,31 +2837,40 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x float]*, i32, float*, [5 x [10 x double]]*, i32, i32, double*, %struct.TT*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP9]], [10 x float]* [[TMP0]], i32 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[TMP2]], float** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK3-NEXT: store double* [[TMP6]], double** [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK3-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2820,122 +2880,128 @@ // CHECK3-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 -// CHECK3-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 -// CHECK3-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK3-NEXT: store i8 [[CONV]], i8* [[IT]], align 1 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP33:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK3-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK3-NEXT: store float [[CONV5]], float* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 3 +// CHECK3-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK3-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK3-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4 -// CHECK3-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK3-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK3-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4 -// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8 -// CHECK3-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK3-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] -// CHECK3-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8 -// CHECK3-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8 -// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4 -// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK3-NEXT: store i64 [[ADD20]], i64* [[X]], align 4 -// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4 -// CHECK3-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK3-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK3-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4 +// CHECK3-NEXT: store float [[CONV9]], float* [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i32 0, i32 1 +// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX10]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX11]], align 8 +// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8 +// CHECK3-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 [[TMP36]] +// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX13]], i32 3 +// CHECK3-NEXT: [[TMP37:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK3-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8 +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP38:%.*]] = load i64, i64* [[X]], align 4 +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK3-NEXT: store i64 [[ADD16]], i64* [[X]], align 4 +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP39:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK3-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK3-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK3-NEXT: store i8 [[CONV19]], i8* [[Y]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK3-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK3-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK3-NEXT: store i32 [[ADD25]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK3-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK3-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK3-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) // CHECK3-NEXT: ret void // // @@ -3237,7 +3303,7 @@ // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -3247,23 +3313,28 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -3273,72 +3344,76 @@ // CHECK3-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 4 // CHECK3-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP15]], i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK3-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK3-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], double* [[A]], align 4 -// CHECK3-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, double* [[A4]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], double* [[A4]], align 4 -// CHECK3-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK3-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP14]] -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// CHECK3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = load double, double* [[A2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], double* [[A2]], align 4 +// CHECK3-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 [[TMP21]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: store i16 [[CONV3]], i16* [[ARRAYIDX4]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK3-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK3-NEXT: store i64 [[ADD5]], i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK3-NEXT: ret void // // @@ -3349,9 +3424,7 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 @@ -3359,41 +3432,47 @@ // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* -// CHECK3-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK3-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 -// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK3-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -3403,32 +3482,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -3438,66 +3517,71 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK3-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 6, i64* [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK3-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK3-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK3-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 +// CHECK3-NEXT: store i64 [[ADD6]], i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK3-NEXT: ret void // // @@ -3511,15 +3595,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103 // CHECK9-SAME: () #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3529,46 +3615,48 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 5, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK9-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK9-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK9-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK9-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK9-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK9: .cancel.exit: // CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK9: .cancel.continue: -// CHECK9-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK9-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK9-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK9-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK9: .cancel.exit2: // CHECK9-NEXT: br label [[CANCEL_EXIT]] // CHECK9: .cancel.continue3: @@ -3576,19 +3664,19 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: cancel.cont: // CHECK9-NEXT: ret void // CHECK9: cancel.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: br label [[CANCEL_CONT]] // // @@ -3598,138 +3686,139 @@ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[LIN_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: store i64 [[LIN]], i64* [[LIN_ADDR]], align 8 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK9-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[LIN:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK9-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTLINEAR_START3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTLINEAR_START1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTLINEAR_STEP:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[IT:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[LIN4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[A5:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[LIN2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[LIN]], i64* [[LIN_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 4 +// CHECK9-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[LIN]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[LIN]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START1]], align 4 // CHECK9-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK9-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 // CHECK9-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK9-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK9-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP10]]) +// CHECK9-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK9-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK9-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 -// CHECK9-NEXT: [[MUL8:%.*]] = mul i64 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: [[ADD:%.*]] = add i64 [[CONV7]], [[MUL8]] -// CHECK9-NEXT: [[CONV9:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4 -// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 -// CHECK9-NEXT: [[MUL11:%.*]] = mul i64 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] -// CHECK9-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 -// CHECK9-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK9-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 -// CHECK9-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK9-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK9-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] +// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV6]], i32* [[LIN2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4 +// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK9-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] +// CHECK9-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 +// CHECK9-NEXT: store i32 [[CONV10]], i32* [[A3]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK9-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 +// CHECK9-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 +// CHECK9-NEXT: store i16 [[CONV13]], i16* [[AA]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD17:%.*]] = add i64 [[TMP17]], 1 -// CHECK9-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 +// CHECK9-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK9: .omp.linear.pu: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK9-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[LIN2]], align 4 +// CHECK9-NEXT: store i32 [[TMP27]], i32* [[LIN]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], i32* [[A]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK9: .omp.linear.pu.done: // CHECK9-NEXT: ret void @@ -3746,31 +3835,29 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK9-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3780,62 +3867,66 @@ // CHECK9-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 3, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK9-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 -// CHECK9-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK9-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK9-NEXT: store i16 [[CONV]], i16* [[IT]], align 2 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK9-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK9-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 +// CHECK9-NEXT: store i16 [[CONV5]], i16* [[AA]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK9-NEXT: ret void // // @@ -3852,8 +3943,7 @@ // CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -3874,33 +3964,40 @@ // CHECK9-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 -// CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [10 x float]*, i64, float*, [5 x [10 x double]]*, i64, i64, double*, %struct.TT*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], [10 x float]* [[TMP0]], i64 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]], i64 [[TMP11]]) +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], i64* [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store float* [[TMP2]], float** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK9-NEXT: store i64 [[TMP5]], i64* [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK9-NEXT: store double* [[TMP6]], double** [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK9-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV5]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 -// CHECK9-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3910,124 +4007,128 @@ // CHECK9-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 -// CHECK9-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 -// CHECK9-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 7 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 9 +// CHECK9-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 10 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 8 +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] -// CHECK9-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK9-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double -// CHECK9-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 -// CHECK9-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK9-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK9-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4 -// CHECK9-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double -// CHECK9-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 -// CHECK9-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK9-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4 -// CHECK9-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 -// CHECK9-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8 -// CHECK9-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK9-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] -// CHECK9-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK9-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8 -// CHECK9-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8 -// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8 -// CHECK9-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK9-NEXT: store i64 [[ADD22]], i64* [[X]], align 8 -// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8 -// CHECK9-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK9-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 -// CHECK9-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK9-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 +// CHECK9-NEXT: store i8 [[CONV]], i8* [[IT]], align 1 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP33:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK9-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK9-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK9-NEXT: store float [[CONV5]], float* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 3 +// CHECK9-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK9-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double +// CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 +// CHECK9-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float +// CHECK9-NEXT: store float [[CONV9]], float* [[ARRAYIDX6]], align 4 +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i64 0, i64 1 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX10]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 [[TMP36]] +// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX13]], i64 3 +// CHECK9-NEXT: [[TMP37:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK9-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8 +// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i64, i64* [[X]], align 8 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK9-NEXT: store i64 [[ADD16]], i64* [[X]], align 8 +// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP39:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK9-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK9-NEXT: store i8 [[CONV19]], i8* [[Y]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK9-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK9-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK9-NEXT: store i32 [[ADD27]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK9-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK9-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK9-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) // CHECK9-NEXT: ret void // // @@ -4038,9 +4139,7 @@ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 @@ -4049,43 +4148,47 @@ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* -// CHECK9-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV2]], align 1 +// CHECK9-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK9-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK9-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 8 // CHECK9-NEXT: ret void // // @@ -4097,7 +4200,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -4108,24 +4211,28 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]]) +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -4135,73 +4242,76 @@ // CHECK9-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 8 // CHECK9-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK9-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK9-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK9-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], double* [[A]], align 8 -// CHECK9-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 8 -// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK9-NEXT: store double [[INC]], double* [[A5]], align 8 -// CHECK9-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK9-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP14]] -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK9-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK9-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP20:%.*]] = load double, double* [[A2]], align 8 +// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK9-NEXT: store double [[INC]], double* [[A2]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK9-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i64 [[TMP21]] +// CHECK9-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK9-NEXT: store i16 [[CONV3]], i16* [[ARRAYIDX4]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD8:%.*]] = add i64 [[TMP15]], 1 -// CHECK9-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK9-NEXT: store i64 [[ADD5]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK9-NEXT: ret void // // @@ -4211,34 +4321,33 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -4248,82 +4357,88 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK9-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK9-NEXT: store i64 6, i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK9-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK9-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK9-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK9-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK9-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK9-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 +// CHECK9-NEXT: store i64 [[ADD6]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103 // CHECK11-SAME: () #[[ATTR0:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4333,46 +4448,48 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 5, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK11-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK11-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK11-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK11-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK11-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK11: .cancel.exit: // CHECK11-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK11: .cancel.continue: -// CHECK11-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK11-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK11-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK11-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK11: .cancel.exit2: // CHECK11-NEXT: br label [[CANCEL_EXIT]] // CHECK11: .cancel.continue3: @@ -4380,19 +4497,19 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK11: cancel.cont: // CHECK11-NEXT: ret void // CHECK11: cancel.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[CANCEL_CONT]] // // @@ -4402,35 +4519,33 @@ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[LIN_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[LIN_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[LIN_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[LIN_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[LIN_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[LIN:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK11-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK11-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -4445,89 +4560,96 @@ // CHECK11-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[LIN_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START1]], align 4 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 4 +// CHECK11-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[LIN]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[LIN]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START1]], align 4 // CHECK11-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK11-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 // CHECK11-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK11-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK11-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP10]]) +// CHECK11-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP13]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK11-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK11-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK11-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 -// CHECK11-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 -// CHECK11-NEXT: [[MUL6:%.*]] = mul i64 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: [[ADD:%.*]] = add i64 [[CONV5]], [[MUL6]] -// CHECK11-NEXT: [[CONV7:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 -// CHECK11-NEXT: [[MUL9:%.*]] = mul i64 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] -// CHECK11-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 -// CHECK11-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 -// CHECK11-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK11-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK11-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] +// CHECK11-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV6]], i32* [[LIN2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4 +// CHECK11-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK11-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] +// CHECK11-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 +// CHECK11-NEXT: store i32 [[CONV10]], i32* [[A3]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK11-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 +// CHECK11-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 +// CHECK11-NEXT: store i16 [[CONV13]], i16* [[AA]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD15:%.*]] = add i64 [[TMP17]], 1 -// CHECK11-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 +// CHECK11-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK11-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK11: .omp.linear.pu: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN2]], align 4 -// CHECK11-NEXT: store i32 [[TMP20]], i32* [[LIN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[LIN2]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], i32* [[LIN]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], i32* [[A]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK11: .omp.linear.pu.done: // CHECK11-NEXT: ret void @@ -4544,29 +4666,28 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4576,61 +4697,66 @@ // CHECK11-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 3, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] -// CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK11-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK11-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK11-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK11-NEXT: store i16 [[CONV]], i16* [[IT]], align 2 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK11-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK11-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 +// CHECK11-NEXT: store i16 [[CONV5]], i16* [[AA]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK11-NEXT: ret void // // @@ -4647,8 +4773,7 @@ // CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -4667,31 +4792,40 @@ // CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x float]*, i32, float*, [5 x [10 x double]]*, i32, i32, double*, %struct.TT*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP9]], [10 x float]* [[TMP0]], i32 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]], i32 [[TMP11]]) +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store float* [[TMP2]], float** [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK11-NEXT: store double* [[TMP6]], double** [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK11-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 -// CHECK11-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4701,122 +4835,128 @@ // CHECK11-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 -// CHECK11-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 -// CHECK11-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 7 +// CHECK11-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 9 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK11-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK11-NEXT: store i8 [[CONV]], i8* [[IT]], align 1 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP33:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK11-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK11-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK11-NEXT: store float [[CONV5]], float* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 3 +// CHECK11-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK11-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK11-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK11-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK11-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4 -// CHECK11-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK11-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK11-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK11-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4 -// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 -// CHECK11-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8 -// CHECK11-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 -// CHECK11-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK11-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] -// CHECK11-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK11-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8 -// CHECK11-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8 -// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4 -// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK11-NEXT: store i64 [[ADD20]], i64* [[X]], align 4 -// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4 -// CHECK11-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK11-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK11-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK11-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4 +// CHECK11-NEXT: store float [[CONV9]], float* [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i32 0, i32 1 +// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX10]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX11]], align 8 +// CHECK11-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8 +// CHECK11-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK11-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 [[TMP36]] +// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX13]], i32 3 +// CHECK11-NEXT: [[TMP37:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK11-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8 +// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = load i64, i64* [[X]], align 4 +// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK11-NEXT: store i64 [[ADD16]], i64* [[X]], align 4 +// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP39:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK11-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK11-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK11-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK11-NEXT: store i8 [[CONV19]], i8* [[Y]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK11-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK11-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK11-NEXT: store i32 [[ADD25]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK11-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK11-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK11-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) // CHECK11-NEXT: ret void // // @@ -4827,9 +4967,7 @@ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 @@ -4837,41 +4975,47 @@ // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK11-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* -// CHECK11-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK11-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK11-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK11-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 4 // CHECK11-NEXT: ret void // // @@ -4883,7 +5027,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -4893,23 +5037,28 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]]) +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -4919,72 +5068,76 @@ // CHECK11-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 4 // CHECK11-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK11-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP15]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK11-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK11-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], double* [[A]], align 4 -// CHECK11-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP13:%.*]] = load double, double* [[A4]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK11-NEXT: store double [[INC]], double* [[A4]], align 4 -// CHECK11-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK11-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP14]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK11-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// CHECK11-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP20:%.*]] = load double, double* [[A2]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK11-NEXT: store double [[INC]], double* [[A2]], align 4 +// CHECK11-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK11-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 [[TMP21]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK11-NEXT: store i16 [[CONV3]], i16* [[ARRAYIDX4]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK11-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK11-NEXT: store i64 [[ADD5]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK11-NEXT: ret void // // @@ -4994,32 +5147,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -5029,66 +5182,71 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK11-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK11-NEXT: store i64 6, i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK11-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK11-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK11-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK11-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK11-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK11-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK11-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 +// CHECK11-NEXT: store i64 [[ADD6]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK11-NEXT: ret void // // @@ -5120,7 +5278,7 @@ // CHECK17-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 // CHECK17-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 // CHECK17-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 -// CHECK17-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK17-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK17-NEXT: [[A_CASTED6:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AA_CASTED8:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8 @@ -5204,13 +5362,13 @@ // CHECK17-NEXT: store i8* null, i8** [[TMP33]], align 8 // CHECK17-NEXT: [[TMP34:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP35:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 0 // CHECK17-NEXT: [[TMP37:%.*]] = load i16, i16* [[AA]], align 2 // CHECK17-NEXT: store i16 [[TMP37]], i16* [[TMP36]], align 4 -// CHECK17-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK17-NEXT: [[TMP39:%.*]] = load i32, i32* [[LIN]], align 4 // CHECK17-NEXT: store i32 [[TMP39]], i32* [[TMP38]], align 4 -// CHECK17-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 2 // CHECK17-NEXT: [[TMP41:%.*]] = load i32, i32* [[A]], align 4 // CHECK17-NEXT: store i32 [[TMP41]], i32* [[TMP40]], align 4 // CHECK17-NEXT: [[TMP42:%.*]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i64 120, i64 12, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*), i64 -1) @@ -5218,10 +5376,10 @@ // CHECK17-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP43]], i32 0, i32 0 // CHECK17-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP44]], i32 0, i32 0 // CHECK17-NEXT: [[TMP46:%.*]] = load i8*, i8** [[TMP45]], align 8 -// CHECK17-NEXT: [[TMP47:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// CHECK17-NEXT: [[TMP47:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* // CHECK17-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 12, i1 false) // CHECK17-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP43]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP49:%.*]] = bitcast i8* [[TMP46]] to %struct.anon* +// CHECK17-NEXT: [[TMP49:%.*]] = bitcast i8* [[TMP46]] to %struct.anon.2* // CHECK17-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP48]], i32 0, i32 0 // CHECK17-NEXT: [[TMP51:%.*]] = bitcast [3 x i8*]* [[TMP50]] to i8* // CHECK17-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP34]] to i8* @@ -5406,15 +5564,17 @@ // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103 // CHECK17-SAME: () #[[ATTR2:[0-9]+]] { // CHECK17-NEXT: entry: -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5424,46 +5584,48 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 5, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK17-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK17-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK17-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK17-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK17-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK17: .cancel.exit: // CHECK17-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK17: .cancel.continue: -// CHECK17-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK17-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK17-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK17-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK17-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK17-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK17: .cancel.exit2: // CHECK17-NEXT: br label [[CANCEL_EXIT]] // CHECK17: .cancel.continue3: @@ -5471,19 +5633,19 @@ // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK17-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK17-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK17: cancel.cont: // CHECK17-NEXT: ret void // CHECK17: cancel.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK17-NEXT: br label [[CANCEL_CONT]] // // @@ -5492,29 +5654,28 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[K_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[K_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[TMP2]], i64* [[K_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[K_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[K_ADDR]], align 8 +// CHECK17-NEXT: store i64 [[TMP3]], i64* [[TMP2]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[K:%.*]]) #[[ATTR3]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -5526,66 +5687,71 @@ // CHECK17-NEXT: [[K1:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[K_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[TMP0]], i64* [[DOTLINEAR_START]], align 8 +// CHECK17-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[K]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[K]], align 8 +// CHECK17-NEXT: store i64 [[TMP5]], i64* [[DOTLINEAR_START]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 8, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]]) -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK17-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK17-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK17-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !12 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK17-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 -// CHECK17-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !12 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK17-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK17-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] // CHECK17-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !12 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !12 -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK17-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !12 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !12 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[A]], align 4, !llvm.access.group !12 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK17-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK17-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK17-NEXT: br i1 [[TMP13]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK17-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK17: .omp.linear.pu: -// CHECK17-NEXT: [[TMP14:%.*]] = load i64, i64* [[K1]], align 8 -// CHECK17-NEXT: store i64 [[TMP14]], i64* [[K_ADDR]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, i64* [[K1]], align 8 +// CHECK17-NEXT: store i64 [[TMP19]], i64* [[K]], align 8 // CHECK17-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK17: .omp.linear.pu.done: // CHECK17-NEXT: ret void @@ -5597,138 +5763,139 @@ // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[LIN_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: store i64 [[LIN]], i64* [[LIN_ADDR]], align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK17-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK17-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 +// CHECK17-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[LIN:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK17-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTLINEAR_START3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTLINEAR_START1:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTLINEAR_STEP:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[IT:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[LIN4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A5:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[LIN2:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[LIN]], i64* [[LIN_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK17-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 +// CHECK17-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 4 +// CHECK17-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[LIN]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], i32* [[A]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[LIN]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START1]], align 4 // CHECK17-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK17-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 // CHECK17-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK17-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK17-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]]) +// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK17-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP13]], i64* [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK17-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK17-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 -// CHECK17-NEXT: [[CONV7:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 -// CHECK17-NEXT: [[MUL8:%.*]] = mul i64 [[TMP11]], [[TMP12]] -// CHECK17-NEXT: [[ADD:%.*]] = add i64 [[CONV7]], [[MUL8]] -// CHECK17-NEXT: [[CONV9:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK17-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4 -// CHECK17-NEXT: [[CONV10:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 -// CHECK17-NEXT: [[MUL11:%.*]] = mul i64 [[TMP14]], [[TMP15]] -// CHECK17-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] -// CHECK17-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 -// CHECK17-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK17-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK17-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 -// CHECK17-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK17-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK17-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] +// CHECK17-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK17-NEXT: store i32 [[CONV6]], i32* [[LIN2]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4 +// CHECK17-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK17-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK17-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] +// CHECK17-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 +// CHECK17-NEXT: store i32 [[CONV10]], i32* [[A3]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK17-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK17-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 +// CHECK17-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 +// CHECK17-NEXT: store i16 [[CONV13]], i16* [[AA]], align 2 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[ADD17:%.*]] = add i64 [[TMP17]], 1 -// CHECK17-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 +// CHECK17-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK17-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK17: .omp.linear.pu: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK17-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK17-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[LIN2]], align 4 +// CHECK17-NEXT: store i32 [[TMP27]], i32* [[LIN]], align 4 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK17-NEXT: store i32 [[TMP28]], i32* [[A]], align 4 // CHECK17-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK17: .omp.linear.pu.done: // CHECK17-NEXT: ret void @@ -5771,7 +5938,7 @@ // CHECK17-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK17-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK17-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK17-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.2*, align 8 // CHECK17-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i16*, align 8 // CHECK17-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca [3 x i8*]*, align 8 // CHECK17-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [3 x i8*]*, align 8 @@ -5789,7 +5956,7 @@ // CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK17-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK17-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* // CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK17-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK17-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -5802,8 +5969,8 @@ // CHECK17-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 // CHECK17-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 // CHECK17-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 -// CHECK17-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 -// CHECK17-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK17-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK17-NEXT: [[TMP12:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 // CHECK17-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 // CHECK17-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 // CHECK17-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @@ -5815,8 +5982,8 @@ // CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK17-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 -// CHECK17-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP12]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP12]], i32 0, i32 2 // CHECK17-NEXT: [[TMP25:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l138.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* [[TMP22]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0, i32 0, i8* null, i32 0, i8* null) #[[ATTR4]] // CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK17-NEXT: br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__3_EXIT:%.*]] @@ -5844,31 +6011,29 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK17-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK17-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5878,62 +6043,66 @@ // CHECK17-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK17-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 3, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK17-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK17-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 -// CHECK17-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK17-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK17-NEXT: store i16 [[CONV]], i16* [[IT]], align 2 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK17-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK17-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 +// CHECK17-NEXT: store i16 [[CONV5]], i16* [[AA]], align 2 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK17-NEXT: ret void // // @@ -5950,8 +6119,7 @@ // CHECK17-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -5972,33 +6140,40 @@ // CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 -// CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [10 x float]*, i64, float*, [5 x [10 x double]]*, i64, i64, double*, %struct.TT*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], [10 x float]* [[TMP0]], i64 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]], i64 [[TMP11]]) +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP1]], i64* [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store float* [[TMP2]], float** [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK17-NEXT: store i64 [[TMP5]], i64* [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK17-NEXT: store double* [[TMP6]], double** [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK17-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV5]], align 4 +// CHECK17-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 -// CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6006,126 +6181,130 @@ // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[IT:%.*]] = alloca i8, align 1 -// CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK17-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 -// CHECK17-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 -// CHECK17-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 -// CHECK17-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 8 +// CHECK17-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 9 +// CHECK17-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 10 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 8 +// CHECK17-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] -// CHECK17-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK17-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK17-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double -// CHECK17-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 -// CHECK17-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK17-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4 -// CHECK17-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK17-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4 -// CHECK17-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double -// CHECK17-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 -// CHECK17-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK17-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4 -// CHECK17-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 -// CHECK17-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8 -// CHECK17-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8 -// CHECK17-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK17-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] -// CHECK17-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK17-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8 -// CHECK17-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8 -// CHECK17-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8 -// CHECK17-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK17-NEXT: store i64 [[ADD22]], i64* [[X]], align 8 -// CHECK17-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8 -// CHECK17-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK17-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 -// CHECK17-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK17-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 +// CHECK17-NEXT: store i8 [[CONV]], i8* [[IT]], align 1 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[A]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP33:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK17-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK17-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK17-NEXT: store float [[CONV5]], float* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 3 +// CHECK17-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK17-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double +// CHECK17-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 +// CHECK17-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float +// CHECK17-NEXT: store float [[CONV9]], float* [[ARRAYIDX6]], align 4 +// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i64 0, i64 1 +// CHECK17-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX10]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX11]], align 8 +// CHECK17-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK17-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8 +// CHECK17-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK17-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 [[TMP36]] +// CHECK17-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX13]], i64 3 +// CHECK17-NEXT: [[TMP37:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK17-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK17-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8 +// CHECK17-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP38:%.*]] = load i64, i64* [[X]], align 8 +// CHECK17-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK17-NEXT: store i64 [[ADD16]], i64* [[X]], align 8 +// CHECK17-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP39:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK17-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK17-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK17-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK17-NEXT: store i8 [[CONV19]], i8* [[Y]], align 8 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK17-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK17-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK17-NEXT: store i32 [[ADD27]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK17-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK17-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK17-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) // CHECK17-NEXT: ret void // // @@ -6430,7 +6609,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 -// CHECK17-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK17-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -6441,24 +6620,28 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]]) +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK17-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -6468,73 +6651,76 @@ // CHECK17-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 8 // CHECK17-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK17-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK17-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK17-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK17-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP15]], i64* [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK17-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK17-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[B]], align 4 +// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK17-NEXT: store double [[ADD]], double* [[A]], align 8 -// CHECK17-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 8 -// CHECK17-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK17-NEXT: store double [[INC]], double* [[A5]], align 8 -// CHECK17-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK17-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP14]] -// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK17-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP20:%.*]] = load double, double* [[A2]], align 8 +// CHECK17-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK17-NEXT: store double [[INC]], double* [[A2]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK17-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i64 [[TMP21]] +// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK17-NEXT: store i16 [[CONV3]], i16* [[ARRAYIDX4]], align 2 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[ADD8:%.*]] = add i64 [[TMP15]], 1 -// CHECK17-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK17-NEXT: store i64 [[ADD5]], i64* [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK17-NEXT: ret void // // @@ -6545,9 +6731,7 @@ // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 @@ -6556,43 +6740,47 @@ // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 -// CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* -// CHECK17-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK17-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV2]], align 1 +// CHECK17-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK17-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK17-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 8 // CHECK17-NEXT: ret void // // @@ -6602,34 +6790,33 @@ // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK17-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -6639,67 +6826,71 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK17-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK17-NEXT: store i64 6, i64* [[DOTOMP_UB]], align 8 // CHECK17-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK17-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK17-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] -// CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK17-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK17-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK17-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK17-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK17-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 +// CHECK17-NEXT: store i64 [[ADD6]], i64* [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK17-NEXT: ret void // // @@ -6737,7 +6928,7 @@ // CHECK19-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 // CHECK19-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 // CHECK19-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 -// CHECK19-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK19-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: [[A_CASTED3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AA_CASTED4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOFFLOAD_BASEPTRS6:%.*]] = alloca [2 x i8*], align 4 @@ -6813,13 +7004,13 @@ // CHECK19-NEXT: store i8* null, i8** [[TMP29]], align 4 // CHECK19-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 0 // CHECK19-NEXT: [[TMP33:%.*]] = load i16, i16* [[AA]], align 2 // CHECK19-NEXT: store i16 [[TMP33]], i16* [[TMP32]], align 4 -// CHECK19-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK19-NEXT: [[TMP35:%.*]] = load i32, i32* [[LIN]], align 4 // CHECK19-NEXT: store i32 [[TMP35]], i32* [[TMP34]], align 4 -// CHECK19-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 2 // CHECK19-NEXT: [[TMP37:%.*]] = load i32, i32* [[A]], align 4 // CHECK19-NEXT: store i32 [[TMP37]], i32* [[TMP36]], align 4 // CHECK19-NEXT: [[TMP38:%.*]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 72, i32 12, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*), i64 -1) @@ -6827,10 +7018,10 @@ // CHECK19-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP39]], i32 0, i32 0 // CHECK19-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP40]], i32 0, i32 0 // CHECK19-NEXT: [[TMP42:%.*]] = load i8*, i8** [[TMP41]], align 4 -// CHECK19-NEXT: [[TMP43:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// CHECK19-NEXT: [[TMP43:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* // CHECK19-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i32 12, i1 false) // CHECK19-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP39]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to %struct.anon* +// CHECK19-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to %struct.anon.2* // CHECK19-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP44]], i32 0, i32 0 // CHECK19-NEXT: [[TMP47:%.*]] = bitcast [3 x i64]* [[TMP46]] to i8* // CHECK19-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP47]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false) @@ -7014,15 +7205,17 @@ // CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103 // CHECK19-SAME: () #[[ATTR2:[0-9]+]] { // CHECK19-NEXT: entry: -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7032,46 +7225,48 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 5, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK19-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK19-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK19-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK19-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK19-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK19: .cancel.exit: // CHECK19-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK19: .cancel.continue: -// CHECK19-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK19-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK19-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK19-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK19-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK19-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK19: .cancel.exit2: // CHECK19-NEXT: br label [[CANCEL_EXIT]] // CHECK19: .cancel.continue3: @@ -7079,19 +7274,19 @@ // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK19-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK19-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK19: cancel.cont: // CHECK19-NEXT: ret void // CHECK19: cancel.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK19-NEXT: br label [[CANCEL_CONT]] // // @@ -7100,24 +7295,26 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[K_ADDR:%.*]] = alloca i64*, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i64* [[K]], i64** [[K_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i64*, i64** [[K_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i64*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP2]], i64* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i64* [[TMP0]], i64** [[TMP3]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i64* noundef nonnull align 4 dereferenceable(8) [[K:%.*]]) #[[ATTR3]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[K_ADDR:%.*]] = alloca i64*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -7126,69 +7323,73 @@ // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[K1:%.*]] = alloca i64, align 8 +// CHECK19-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i64* [[K]], i64** [[K_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i64*, i64** [[K_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 8 -// CHECK19-NEXT: store i64 [[TMP1]], i64* [[DOTLINEAR_START]], align 8 +// CHECK19-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK19-NEXT: store i64 [[TMP5]], i64* [[DOTLINEAR_START]], align 8 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 8, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK19-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK19-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP4]], 0 +// CHECK19-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK19-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK19-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !13 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK19-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 -// CHECK19-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK19-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !13 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !13 -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK19-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !13 +// CHECK19-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !13 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK19-NEXT: [[MUL1:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK19-NEXT: [[CONV:%.*]] = sext i32 [[MUL1]] to i64 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] +// CHECK19-NEXT: store i64 [[ADD]], i64* [[K]], align 8, !llvm.access.group !13 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !13 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !13 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK19-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK19-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK19-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK19: .omp.linear.pu: -// CHECK19-NEXT: [[TMP15:%.*]] = load i64, i64* [[K1]], align 8 -// CHECK19-NEXT: store i64 [[TMP15]], i64* [[TMP0]], align 8 +// CHECK19-NEXT: [[TMP19:%.*]] = load i64, i64* [[K]], align 8 +// CHECK19-NEXT: store i64 [[TMP19]], i64* [[TMP4]], align 8 // CHECK19-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK19: .omp.linear.pu.done: // CHECK19-NEXT: ret void @@ -7200,35 +7401,33 @@ // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[LIN_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK19-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[LIN_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[LIN_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[LIN_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK19-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[LIN_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[LIN:%.*]], i32 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK19-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK19-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK19-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -7243,89 +7442,96 @@ // CHECK19-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[LIN_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START1]], align 4 +// CHECK19-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 4 +// CHECK19-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[LIN]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], i32* [[A]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[LIN]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START1]], align 4 // CHECK19-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK19-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 // CHECK19-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK19-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK19-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK19-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK19-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]]) +// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK19-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK19-NEXT: store i64 [[TMP13]], i64* [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK19-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK19-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK19-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 -// CHECK19-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK19-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 -// CHECK19-NEXT: [[MUL6:%.*]] = mul i64 [[TMP11]], [[TMP12]] -// CHECK19-NEXT: [[ADD:%.*]] = add i64 [[CONV5]], [[MUL6]] -// CHECK19-NEXT: [[CONV7:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK19-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4 -// CHECK19-NEXT: [[CONV8:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK19-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 -// CHECK19-NEXT: [[MUL9:%.*]] = mul i64 [[TMP14]], [[TMP15]] -// CHECK19-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] -// CHECK19-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 -// CHECK19-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK19-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK19-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 -// CHECK19-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK19-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK19-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK19-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] +// CHECK19-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK19-NEXT: store i32 [[CONV6]], i32* [[LIN2]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4 +// CHECK19-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK19-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK19-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] +// CHECK19-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] +// CHECK19-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 +// CHECK19-NEXT: store i32 [[CONV10]], i32* [[A3]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK19-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK19-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 +// CHECK19-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 +// CHECK19-NEXT: store i16 [[CONV13]], i16* [[AA]], align 2 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[ADD15:%.*]] = add i64 [[TMP17]], 1 -// CHECK19-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 +// CHECK19-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK19-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK19-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK19: .omp.linear.pu: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN2]], align 4 -// CHECK19-NEXT: store i32 [[TMP20]], i32* [[LIN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK19-NEXT: store i32 [[TMP21]], i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[LIN2]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], i32* [[LIN]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK19-NEXT: store i32 [[TMP28]], i32* [[A]], align 4 // CHECK19-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK19: .omp.linear.pu.done: // CHECK19-NEXT: ret void @@ -7368,7 +7574,7 @@ // CHECK19-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 4 // CHECK19-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 4 // CHECK19-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 4 -// CHECK19-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.2*, align 4 // CHECK19-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i16*, align 4 // CHECK19-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca [3 x i8*]*, align 4 // CHECK19-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [3 x i8*]*, align 4 @@ -7386,7 +7592,7 @@ // CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK19-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK19-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* // CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK19-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK19-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -7399,8 +7605,8 @@ // CHECK19-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 // CHECK19-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 // CHECK19-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !25 -// CHECK19-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 -// CHECK19-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK19-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK19-NEXT: [[TMP12:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 // CHECK19-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 // CHECK19-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 // CHECK19-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @@ -7412,8 +7618,8 @@ // CHECK19-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK19-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP12]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP12]], i32 0, i32 2 // CHECK19-NEXT: [[TMP25:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l138.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* [[TMP22]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0, i32 0, i8* null, i32 0, i8* null) #[[ATTR4]] // CHECK19-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK19-NEXT: br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__3_EXIT:%.*]] @@ -7439,29 +7645,28 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK19-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK19-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7471,61 +7676,66 @@ // CHECK19-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK19-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 3, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] -// CHECK19-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK19-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK19-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK19-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK19-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK19-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2 +// CHECK19-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK19-NEXT: store i16 [[CONV]], i16* [[IT]], align 2 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 +// CHECK19-NEXT: store i16 [[CONV5]], i16* [[AA]], align 2 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK19-NEXT: ret void // // @@ -7542,8 +7752,7 @@ // CHECK19-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -7562,31 +7771,40 @@ // CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 // CHECK19-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x float]*, i32, float*, [5 x [10 x double]]*, i32, i32, double*, %struct.TT*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP9]], [10 x float]* [[TMP0]], i32 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32 [[TMP1]], i32* [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store float* [[TMP2]], float** [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK19-NEXT: store i32 [[TMP5]], i32* [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK19-NEXT: store double* [[TMP6]], double** [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK19-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 -// CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7596,122 +7814,128 @@ // CHECK19-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 -// CHECK19-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 -// CHECK19-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 -// CHECK19-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 7 +// CHECK19-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 8 +// CHECK19-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 9 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK19-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK19-NEXT: store i8 [[CONV]], i8* [[IT]], align 1 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK19-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK19-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[A]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP33:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK19-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK19-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK19-NEXT: store float [[CONV5]], float* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 3 +// CHECK19-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK19-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK19-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK19-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK19-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4 -// CHECK19-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK19-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4 -// CHECK19-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK19-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK19-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK19-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4 -// CHECK19-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 -// CHECK19-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8 -// CHECK19-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 -// CHECK19-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK19-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] -// CHECK19-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK19-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8 -// CHECK19-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8 -// CHECK19-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4 -// CHECK19-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK19-NEXT: store i64 [[ADD20]], i64* [[X]], align 4 -// CHECK19-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4 -// CHECK19-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK19-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK19-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK19-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4 +// CHECK19-NEXT: store float [[CONV9]], float* [[ARRAYIDX6]], align 4 +// CHECK19-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i32 0, i32 1 +// CHECK19-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX10]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX11]], align 8 +// CHECK19-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK19-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8 +// CHECK19-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK19-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 [[TMP36]] +// CHECK19-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX13]], i32 3 +// CHECK19-NEXT: [[TMP37:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK19-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK19-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8 +// CHECK19-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP38:%.*]] = load i64, i64* [[X]], align 4 +// CHECK19-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK19-NEXT: store i64 [[ADD16]], i64* [[X]], align 4 +// CHECK19-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP39:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK19-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK19-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK19-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK19-NEXT: store i8 [[CONV19]], i8* [[Y]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK19-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK19-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK19-NEXT: store i32 [[ADD25]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK19-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK19-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK19-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) // CHECK19-NEXT: ret void // // @@ -8013,7 +8237,7 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 -// CHECK19-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK19-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -8023,23 +8247,28 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK19-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]]) +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK19-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK19-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -8049,72 +8278,76 @@ // CHECK19-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 4 // CHECK19-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK19-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK19-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK19-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK19-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK19-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK19-NEXT: store i64 [[TMP15]], i64* [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK19-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK19-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK19-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK19-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[B]], align 4 +// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK19-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK19-NEXT: store double [[ADD]], double* [[A]], align 4 -// CHECK19-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP13:%.*]] = load double, double* [[A4]], align 4 -// CHECK19-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK19-NEXT: store double [[INC]], double* [[A4]], align 4 -// CHECK19-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK19-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP14]] -// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// CHECK19-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP20:%.*]] = load double, double* [[A2]], align 4 +// CHECK19-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK19-NEXT: store double [[INC]], double* [[A2]], align 4 +// CHECK19-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK19-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 [[TMP21]] +// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK19-NEXT: store i16 [[CONV3]], i16* [[ARRAYIDX4]], align 2 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK19-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK19-NEXT: store i64 [[ADD5]], i64* [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK19-NEXT: ret void // // @@ -8125,9 +8358,7 @@ // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 @@ -8135,41 +8366,47 @@ // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK19-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* -// CHECK19-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK19-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK19-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK19-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK19-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 4 // CHECK19-NEXT: ret void // // @@ -8179,32 +8416,32 @@ // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK19-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK19-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -8214,66 +8451,71 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK19-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK19-NEXT: store i64 6, i64* [[DOTOMP_UB]], align 8 // CHECK19-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK19-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK19-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK19-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK19-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK19-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK19-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK19-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK19-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK19-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 +// CHECK19-NEXT: store i64 [[ADD6]], i64* [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK19-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp --- a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp @@ -65,8 +65,8 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca [10 x [10 x i32]]*, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: store [10 x [10 x [10 x i32]]] addrspace(1)* [[C]], [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8 // CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META31:![0-9]+]], metadata !DIExpression()), !dbg [[DBG32:![0-9]+]] // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 @@ -94,239 +94,190 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG41]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG42:![0-9]+]] -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*, !dbg [[DBG42]] -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[CONV]], align 4, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[A_CASTED]], align 8, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [10 x [10 x [10 x i32]]]* [[TMP2]] to i8*, !dbg [[DBG42]] -// CHECK1-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP11]] to i8*, !dbg [[DBG42]] -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast [10 x [10 x i32]]* [[TMP4]] to i8*, !dbg [[DBG42]] -// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG42]] -// CHECK1-NEXT: store i8* [[TMP7]], i8** [[TMP18]], align 8, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG43:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP19]] to i1, !dbg [[DBG43]] -// CHECK1-NEXT: [[TMP20:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**, !dbg [[DBG42]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB6]], i32 [[TMP9]], i32 [[TMP20]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x [10 x [10 x i32]]]*, i64, [10 x [10 x i32]]*, i8*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP21]], i64 4), !dbg [[DBG42]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB8:[0-9]+]], i8 2, i1 false), !dbg [[DBG45:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG46:![0-9]+]] +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG42:![0-9]+]] +// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP2]], [10 x [10 x [10 x i32]]]** [[TMP10]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG43:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG42]] +// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP4]], [10 x [10 x i32]]** [[TMP13]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG42]] +// CHECK1-NEXT: store i8* [[TMP7]], i8** [[TMP14]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8*, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP16:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 32), !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP17:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP15]], i8* [[TMP16]]), !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP17]] to %struct.anon*, !dbg [[DBG42]] +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP18]], %struct.anon* [[TMP19]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG45:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP20]] to i1, !dbg [[DBG45]] +// CHECK1-NEXT: [[TMP21:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB6]], i32 [[TMP9]], i32 [[TMP21]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* null, i8* [[TMP17]]), !dbg [[DBG42]] +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP16]], i64 32), !dbg [[DBG42]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB8:[0-9]+]], i8 2, i1 false), !dbg [[DBG46:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG47:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG41]] // // -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___debug__ -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x [10 x i32]]] addrspace(1)* noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], [10 x [10 x i32]]* noalias noundef [[B:%.*]], i8 addrspace(1)* noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG47:![0-9]+]] { +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] !dbg [[DBG48:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]] addrspace(1)*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8 addrspace(1)*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B4:%.*]] = alloca [10 x [10 x i32]], align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca [10 x [10 x i32]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[F:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[G:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[H:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META54:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META59:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60:![0-9]+]] // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META56:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]] addrspace(1)* [[C]], [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META57:![0-9]+]], metadata !DIExpression()), !dbg [[DBG58:![0-9]+]] -// CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META59:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60:![0-9]+]] -// CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META61:![0-9]+]], metadata !DIExpression()), !dbg [[DBG62:![0-9]+]] -// CHECK1-NEXT: store i8 addrspace(1)* [[BB]], i8 addrspace(1)** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8 addrspace(1)** [[BB_ADDR]], metadata [[META63:![0-9]+]], metadata !DIExpression()), !dbg [[DBG64:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]] addrspace(1)*, [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8, !dbg [[DBG65:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP0]] to [10 x [10 x [10 x i32]]]*, !dbg [[DBG65]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP1]], [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP3]], [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP4:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)** [[BB_ADDR]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast i8 addrspace(1)* [[TMP5]] to i8*, !dbg [[DBG65]] -// CHECK1-NEXT: store i8* [[TMP6]], i8** [[_TMP2]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IV]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_LB]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG68:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_UB]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_STRIDE]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IS_LAST]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]* [[B4]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast [10 x [10 x i32]]* [[B4]] to i8*, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [10 x [10 x i32]]* [[TMP4]] to i8*, !dbg [[DBG65]] -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 400, i1 false), !dbg [[DBG65]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[I]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4, !dbg [[DBG65]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG74:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG65]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META61:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata %struct.anon** [[__CONTEXT_ADDR]], metadata [[META62:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG63:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP1]], align 8, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8, !dbg [[DBG63]] +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[A]], align 4, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP5]], align 8, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !dbg [[DBG63]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IV]], metadata [[META64:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_LB]], metadata [[META65:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG66:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_UB]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_STRIDE]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IS_LAST]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]* [[B]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [10 x [10 x i32]]* [[B]] to i8*, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [10 x [10 x i32]]* [[TMP6]] to i8*, !dbg [[DBG63]] +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i64 400, i1 false), !dbg [[DBG63]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[I]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !dbg [[DBG63]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG63]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG63]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 9, !dbg [[DBG68]] -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9, !dbg [[DBG66]] +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG66]] // CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG68]] +// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG66]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG66]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ], !dbg [[DBG68]] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]], !dbg [[DBG65]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG65]] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ], !dbg [[DBG66]] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG72:![0-9]+]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG63]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG65]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG63]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]], !dbg [[DBG65]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG65]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]], !dbg [[DBG72]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG63]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1, !dbg [[DBG75:![0-9]+]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG75]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !dbg [[DBG75]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[F]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 1, !dbg [[DBG80:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG80]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG80]] -// CHECK1-NEXT: store i32* [[ARRAYIDX8]], i32** [[F]], align 8, !dbg [[DBG79]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[G]], metadata [[META81:![0-9]+]], metadata !DIExpression()), !dbg [[DBG82:![0-9]+]] -// CHECK1-NEXT: store i32* [[A_ADDR]], i32** [[G]], align 8, !dbg [[DBG82]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[H]], metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG84:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[B4]], i64 0, i64 1, !dbg [[DBG85:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG85]] -// CHECK1-NEXT: store i32* [[ARRAYIDX10]], i32** [[H]], align 8, !dbg [[DBG84]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[D]], metadata [[META86:![0-9]+]], metadata !DIExpression()), !dbg [[DBG87:![0-9]+]] -// CHECK1-NEXT: store i32 15, i32* [[D]], align 4, !dbg [[DBG87]] -// CHECK1-NEXT: store i32 5, i32* [[A_ADDR]], align 4, !dbg [[DBG88:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[B4]], i64 0, i64 0, !dbg [[DBG89:![0-9]+]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG90:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG89]] -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG89]] -// CHECK1-NEXT: store i32 10, i32* [[ARRAYIDX12]], align 4, !dbg [[DBG91:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG92:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG92]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG93:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG92]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG92]] -// CHECK1-NEXT: store i32 11, i32* [[ARRAYIDX16]], align 4, !dbg [[DBG94:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG95:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG95]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG96:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG95]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG95]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4, !dbg [[DBG95]] -// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[B4]], i64 0, i64 0, !dbg [[DBG97:![0-9]+]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG98:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG97]] -// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG97]] -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[ARRAYIDX23]], align 4, !dbg [[DBG99:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[B4]], i64 0, i64 0, !dbg [[DBG100:![0-9]+]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG101:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP25]] to i64, !dbg [[DBG100]] -// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG100]] -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX26]], align 4, !dbg [[DBG100]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i8, i8* [[TMP7]], align 1, !dbg [[DBG102:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP27]] to i1, !dbg [[DBG102]] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG102]] -// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP26]], !dbg [[DBG102]] -// CHECK1-NEXT: [[TOBOOL27:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG102]] -// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL27]] to i8, !dbg [[DBG102]] -// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[TMP7]], align 1, !dbg [[DBG102]] -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG103:![0-9]+]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1, !dbg [[DBG73:![0-9]+]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG73]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !dbg [[DBG73]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[F]], metadata [[META74:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 1, !dbg [[DBG78:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG78]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG78]] +// CHECK1-NEXT: store i32* [[ARRAYIDX4]], i32** [[F]], align 8, !dbg [[DBG77]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[G]], metadata [[META79:![0-9]+]], metadata !DIExpression()), !dbg [[DBG80:![0-9]+]] +// CHECK1-NEXT: store i32* [[A]], i32** [[G]], align 8, !dbg [[DBG80]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[H]], metadata [[META81:![0-9]+]], metadata !DIExpression()), !dbg [[DBG82:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[B]], i64 0, i64 1, !dbg [[DBG83:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX5]], i64 0, i64 1, !dbg [[DBG83]] +// CHECK1-NEXT: store i32* [[ARRAYIDX6]], i32** [[H]], align 8, !dbg [[DBG82]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[D]], metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG85:![0-9]+]] +// CHECK1-NEXT: store i32 15, i32* [[D]], align 4, !dbg [[DBG85]] +// CHECK1-NEXT: store i32 5, i32* [[A]], align 4, !dbg [[DBG86:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[B]], i64 0, i64 0, !dbg [[DBG87:![0-9]+]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG88:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG87]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX7]], i64 0, i64 [[IDXPROM]], !dbg [[DBG87]] +// CHECK1-NEXT: store i32 10, i32* [[ARRAYIDX8]], align 4, !dbg [[DBG89:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG90:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX9]], i64 0, i64 0, !dbg [[DBG90]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG91:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG90]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX10]], i64 0, i64 [[IDXPROM11]], !dbg [[DBG90]] +// CHECK1-NEXT: store i32 11, i32* [[ARRAYIDX12]], align 4, !dbg [[DBG92:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG93:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG93]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG94:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG93]] +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG93]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4, !dbg [[DBG93]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[B]], i64 0, i64 0, !dbg [[DBG95:![0-9]+]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG96:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP25]] to i64, !dbg [[DBG95]] +// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG95]] +// CHECK1-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX19]], align 4, !dbg [[DBG97:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[B]], i64 0, i64 0, !dbg [[DBG98:![0-9]+]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG99:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i32 [[TMP26]] to i64, !dbg [[DBG98]] +// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX20]], i64 0, i64 [[IDXPROM21]], !dbg [[DBG98]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX22]], align 4, !dbg [[DBG98]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i8, i8* [[TMP8]], align 1, !dbg [[DBG100:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP28]] to i1, !dbg [[DBG100]] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG100]] +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP27]], !dbg [[DBG100]] +// CHECK1-NEXT: [[TOBOOL23:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG100]] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL23]] to i8, !dbg [[DBG100]] +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[TMP8]], align 1, !dbg [[DBG100]] +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG101:![0-9]+]] // CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG74]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG63]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP28]], 1, !dbg [[DBG65]] -// CHECK1-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG65]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG74]], !llvm.loop [[LOOP104:![0-9]+]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP29]], 1, !dbg [[DBG72]] +// CHECK1-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG72]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG63]], !llvm.loop [[LOOP102:![0-9]+]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG74]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG63]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP29]], [[TMP30]], !dbg [[DBG65]] -// CHECK1-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_LB]], align 4, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP31]], [[TMP32]], !dbg [[DBG65]] -// CHECK1-NEXT: store i32 [[ADD30]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG65]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG74]], !llvm.loop [[LOOP106:![0-9]+]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP30]], [[TMP31]], !dbg [[DBG72]] +// CHECK1-NEXT: store i32 [[ADD25]], i32* [[DOTOMP_LB]], align 4, !dbg [[DBG72]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP32]], [[TMP33]], !dbg [[DBG72]] +// CHECK1-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG72]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG63]], !llvm.loop [[LOOP104:![0-9]+]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP11]]), !dbg [[DBG105:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG107:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x [10 x i32]]]* noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[B:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG108:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116:![0-9]+]] -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[C]], [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]] -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[A_ADDR]], metadata [[META119:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]] -// CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]] -// CHECK1-NEXT: store i8* [[BB]], i8** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META121:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG122:![0-9]+]] -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP7:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP5]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast i8* [[TMP8]] to i8 addrspace(1)*, !dbg [[DBG122]] -// CHECK1-NEXT: call void @__omp_outlined___debug__(i32* [[TMP3]], i32* [[TMP4]], [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP9]], i32 [[TMP6]], [10 x [10 x i32]]* [[TMP7]], i8 addrspace(1)* [[TMP10]]) #[[ATTR3:[0-9]+]], !dbg [[DBG122]] -// CHECK1-NEXT: ret void, !dbg [[DBG122]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP12]]), !dbg [[DBG103:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG105:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13 -// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[B:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[BB:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR5:[0-9]+]] !dbg [[DBG123:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[B:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[BB:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR7:[0-9]+]] !dbg [[DBG106:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 @@ -334,34 +285,34 @@ // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[C]], [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META113:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114:![0-9]+]] // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[A_ADDR]], metadata [[META128:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[A_ADDR]], metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]] // CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]] // CHECK1-NEXT: store i8* [[BB]], i8** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META130:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]] // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[DOTCAPTURE_EXPR__ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG132:![0-9]+]] -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG132]] -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8*, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1, !dbg [[DBG132]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP3]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast i8* [[TMP6]] to i8 addrspace(1)*, !dbg [[DBG132]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__([10 x [10 x [10 x i32]]] addrspace(1)* [[TMP8]], i32 [[TMP4]], [10 x [10 x i32]]* [[TMP5]], i8 addrspace(1)* [[TMP9]], i1 [[TOBOOL]]) #[[ATTR3]], !dbg [[DBG132]] -// CHECK1-NEXT: ret void, !dbg [[DBG132]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[DOTCAPTURE_EXPR__ADDR]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]] +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG119:![0-9]+]] +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG119]] +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8*, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1, !dbg [[DBG119]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP3]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast i8* [[TMP6]] to i8 addrspace(1)*, !dbg [[DBG119]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__([10 x [10 x [10 x i32]]] addrspace(1)* [[TMP8]], i32 [[TMP4]], [10 x [10 x i32]]* [[TMP5]], i8 addrspace(1)* [[TMP9]], i1 [[TOBOOL]]) #[[ATTR3:[0-9]+]], !dbg [[DBG119]] +// CHECK1-NEXT: ret void, !dbg [[DBG119]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__ -// CHECK1-SAME: ([10 x [10 x [10 x i32]]] addrspace(1)* noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], [10 x [10 x i32]] addrspace(1)* noalias noundef [[B:%.*]], i8 addrspace(1)* noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG133:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]] addrspace(1)* noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], [10 x [10 x i32]] addrspace(1)* noalias noundef [[B:%.*]], i8 addrspace(1)* noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG120:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]] addrspace(1)*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 @@ -370,70 +321,65 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca [10 x [10 x i32]]*, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store [10 x [10 x [10 x i32]]] addrspace(1)* [[C]], [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META138:![0-9]+]], metadata !DIExpression()), !dbg [[DBG139:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META125:![0-9]+]], metadata !DIExpression()), !dbg [[DBG126:![0-9]+]] // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META140:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META127:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128:![0-9]+]] // CHECK1-NEXT: store [10 x [10 x i32]] addrspace(1)* [[B]], [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG143:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130:![0-9]+]] // CHECK1-NEXT: store i8 addrspace(1)* [[BB]], i8 addrspace(1)** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8 addrspace(1)** [[BB_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG145:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]] addrspace(1)*, [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8, !dbg [[DBG146:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP0]] to [10 x [10 x [10 x i32]]]*, !dbg [[DBG146]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP1]], [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x i32]] addrspace(1)*, [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast [10 x [10 x i32]] addrspace(1)* [[TMP3]] to [10 x [10 x i32]]*, !dbg [[DBG146]] -// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP4]], [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)** [[BB_ADDR]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast i8 addrspace(1)* [[TMP6]] to i8*, !dbg [[DBG146]] -// CHECK1-NEXT: store i8* [[TMP7]], i8** [[_TMP2]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB10:[0-9]+]], i8 2, i1 false, i1 false), !dbg [[DBG146]] -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG146]] -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG146]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8 addrspace(1)** [[BB_ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]] addrspace(1)*, [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8, !dbg [[DBG133:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP0]] to [10 x [10 x [10 x i32]]]*, !dbg [[DBG133]] +// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP1]], [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x i32]] addrspace(1)*, [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast [10 x [10 x i32]] addrspace(1)* [[TMP3]] to [10 x [10 x i32]]*, !dbg [[DBG133]] +// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP4]], [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)** [[BB_ADDR]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast i8 addrspace(1)* [[TMP6]] to i8*, !dbg [[DBG133]] +// CHECK1-NEXT: store i8* [[TMP7]], i8** [[_TMP2]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB10:[0-9]+]], i8 2, i1 false, i1 false), !dbg [[DBG133]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG133]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG133]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB15:[0-9]+]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG147:![0-9]+]] -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32*, !dbg [[DBG147]] -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV]], align 4, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[A_CASTED]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast [10 x [10 x [10 x i32]]]* [[TMP2]] to i8*, !dbg [[DBG147]] -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP12]] to i8*, !dbg [[DBG147]] -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [10 x [10 x i32]]* [[TMP5]] to i8*, !dbg [[DBG147]] -// CHECK1-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG147]] -// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP19]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**, !dbg [[DBG147]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB15]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x [10 x [10 x i32]]]*, i64, [10 x [10 x i32]]*, i8*)* @__omp_outlined__2 to i8*), i8* null, i8** [[TMP20]], i64 4), !dbg [[DBG147]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB17:[0-9]+]], i8 2, i1 false), !dbg [[DBG148:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG150:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG134:![0-9]+]] +// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP2]], [10 x [10 x [10 x i32]]]** [[TMP11]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG135:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG134]] +// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP5]], [10 x [10 x i32]]** [[TMP14]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG134]] +// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP15]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8*, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP17:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 32), !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP18:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP16]], i8* [[TMP17]]), !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP19:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP18]] to %struct.anon.0*, !dbg [[DBG134]] +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP19]], %struct.anon.0* [[TMP20]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB15]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP18]]), !dbg [[DBG134]] +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP17]], i64 32), !dbg [[DBG134]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB17:[0-9]+]], i8 2, i1 false), !dbg [[DBG137:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG138:![0-9]+]] // CHECK1: worker.exit: -// CHECK1-NEXT: ret void, !dbg [[DBG146]] +// CHECK1-NEXT: ret void, !dbg [[DBG133]] // // -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___debug__1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x [10 x i32]]] addrspace(1)* noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], [10 x [10 x i32]] addrspace(1)* noalias noundef [[B:%.*]], i8 addrspace(1)* noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG151:![0-9]+]] { +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] !dbg [[DBG139:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]] addrspace(1)*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]] addrspace(1)*, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8 addrspace(1)*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 @@ -444,208 +390,162 @@ // CHECK1-NEXT: [[H:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META154:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META146:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147:![0-9]+]] // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META156:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]] addrspace(1)* [[C]], [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG158:![0-9]+]] -// CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META159:![0-9]+]], metadata !DIExpression()), !dbg [[DBG160:![0-9]+]] -// CHECK1-NEXT: store [10 x [10 x i32]] addrspace(1)* [[B]], [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], metadata [[META161:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]] -// CHECK1-NEXT: store i8 addrspace(1)* [[BB]], i8 addrspace(1)** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8 addrspace(1)** [[BB_ADDR]], metadata [[META163:![0-9]+]], metadata !DIExpression()), !dbg [[DBG164:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]] addrspace(1)*, [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8, !dbg [[DBG165:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP0]] to [10 x [10 x [10 x i32]]]*, !dbg [[DBG165]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP1]], [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x i32]] addrspace(1)*, [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast [10 x [10 x i32]] addrspace(1)* [[TMP3]] to [10 x [10 x i32]]*, !dbg [[DBG165]] -// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP4]], [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[_TMP1]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)** [[BB_ADDR]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast i8 addrspace(1)* [[TMP6]] to i8*, !dbg [[DBG165]] -// CHECK1-NEXT: store i8* [[TMP7]], i8** [[_TMP2]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IV]], metadata [[META166:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_LB]], metadata [[META167:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG168:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_UB]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -// CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_STRIDE]], metadata [[META170:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IS_LAST]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[I]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4, !dbg [[DBG165]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB12:[0-9]+]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG173:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG165]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META148:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata %struct.anon.0** [[__CONTEXT_ADDR]], metadata [[META149:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG150:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP1]], align 8, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8, !dbg [[DBG150]] +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[A]], align 4, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP5]], align 8, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !dbg [[DBG150]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IV]], metadata [[META151:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_LB]], metadata [[META152:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG153:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_UB]], metadata [[META154:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_STRIDE]], metadata [[META155:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IS_LAST]], metadata [[META156:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[I]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4, !dbg [[DBG150]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB12:[0-9]+]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG150]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG150]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG168]] -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG168]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG153]] +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG153]] // CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG168]] +// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG153]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG168]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG153]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG168]] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG165]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG165]] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG153]] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG158:![0-9]+]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG150]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG165]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG150]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG165]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG165]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG158]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG150]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG174:![0-9]+]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG174]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !dbg [[DBG174]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[F]], metadata [[META175:![0-9]+]], metadata !DIExpression()), !dbg [[DBG177:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 1, !dbg [[DBG178:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG178]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG178]] -// CHECK1-NEXT: store i32* [[ARRAYIDX7]], i32** [[F]], align 8, !dbg [[DBG177]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[G]], metadata [[META179:![0-9]+]], metadata !DIExpression()), !dbg [[DBG180:![0-9]+]] -// CHECK1-NEXT: store i32* [[A_ADDR]], i32** [[G]], align 8, !dbg [[DBG180]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[H]], metadata [[META181:![0-9]+]], metadata !DIExpression()), !dbg [[DBG182:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP5]], i64 0, i64 1, !dbg [[DBG183:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX8]], i64 0, i64 1, !dbg [[DBG183]] -// CHECK1-NEXT: store i32* [[ARRAYIDX9]], i32** [[H]], align 8, !dbg [[DBG182]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[D]], metadata [[META184:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185:![0-9]+]] -// CHECK1-NEXT: store i32 15, i32* [[D]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: store i32 5, i32* [[A_ADDR]], align 4, !dbg [[DBG186:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP5]], i64 0, i64 0, !dbg [[DBG187:![0-9]+]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG188:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG187]] -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX10]], i64 0, i64 [[IDXPROM]], !dbg [[DBG187]] -// CHECK1-NEXT: store i32 10, i32* [[ARRAYIDX11]], align 4, !dbg [[DBG189:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG190:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX12]], i64 0, i64 0, !dbg [[DBG190]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG191:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG190]] -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX13]], i64 0, i64 [[IDXPROM14]], !dbg [[DBG190]] -// CHECK1-NEXT: store i32 11, i32* [[ARRAYIDX15]], align 4, !dbg [[DBG192:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG193:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX16]], i64 0, i64 0, !dbg [[DBG193]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG194:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG193]] -// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG193]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX19]], align 4, !dbg [[DBG193]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP5]], i64 0, i64 0, !dbg [[DBG195:![0-9]+]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG196:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG195]] -// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX20]], i64 0, i64 [[IDXPROM21]], !dbg [[DBG195]] -// CHECK1-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX22]], align 4, !dbg [[DBG197:![0-9]+]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i8, i8* [[TMP8]], align 1, !dbg [[DBG198:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP24]] to i1, !dbg [[DBG198]] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG198]] -// CHECK1-NEXT: store i32 [[CONV]], i32* [[D]], align 4, !dbg [[DBG199:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG200:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG159:![0-9]+]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG159]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !dbg [[DBG159]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[F]], metadata [[META160:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 1, !dbg [[DBG163:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG163]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG163]] +// CHECK1-NEXT: store i32* [[ARRAYIDX4]], i32** [[F]], align 8, !dbg [[DBG162]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[G]], metadata [[META164:![0-9]+]], metadata !DIExpression()), !dbg [[DBG165:![0-9]+]] +// CHECK1-NEXT: store i32* [[A]], i32** [[G]], align 8, !dbg [[DBG165]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[H]], metadata [[META166:![0-9]+]], metadata !DIExpression()), !dbg [[DBG167:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP6]], i64 0, i64 1, !dbg [[DBG168:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX5]], i64 0, i64 1, !dbg [[DBG168]] +// CHECK1-NEXT: store i32* [[ARRAYIDX6]], i32** [[H]], align 8, !dbg [[DBG167]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[D]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170:![0-9]+]] +// CHECK1-NEXT: store i32 15, i32* [[D]], align 4, !dbg [[DBG170]] +// CHECK1-NEXT: store i32 5, i32* [[A]], align 4, !dbg [[DBG171:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP6]], i64 0, i64 0, !dbg [[DBG172:![0-9]+]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG173:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG172]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX7]], i64 0, i64 [[IDXPROM]], !dbg [[DBG172]] +// CHECK1-NEXT: store i32 10, i32* [[ARRAYIDX8]], align 4, !dbg [[DBG174:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG175:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX9]], i64 0, i64 0, !dbg [[DBG175]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG176:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG175]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX10]], i64 0, i64 [[IDXPROM11]], !dbg [[DBG175]] +// CHECK1-NEXT: store i32 11, i32* [[ARRAYIDX12]], align 4, !dbg [[DBG177:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG178:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG178]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG179:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG178]] +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG178]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4, !dbg [[DBG178]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP6]], i64 0, i64 0, !dbg [[DBG180:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG181:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG180]] +// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG180]] +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX19]], align 4, !dbg [[DBG182:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i8, i8* [[TMP8]], align 1, !dbg [[DBG183:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP24]] to i1, !dbg [[DBG183]] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG183]] +// CHECK1-NEXT: store i32 [[CONV]], i32* [[D]], align 4, !dbg [[DBG184:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG185:![0-9]+]] // CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG173]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG150]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP25]], 1, !dbg [[DBG165]] -// CHECK1-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG165]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG173]], !llvm.loop [[LOOP201:![0-9]+]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP25]], 1, !dbg [[DBG158]] +// CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG158]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG150]], !llvm.loop [[LOOP186:![0-9]+]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG173]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG150]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP26]], [[TMP27]], !dbg [[DBG165]] -// CHECK1-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_LB]], align 4, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]], !dbg [[DBG165]] -// CHECK1-NEXT: store i32 [[ADD25]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG165]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG173]], !llvm.loop [[LOOP203:![0-9]+]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP26]], [[TMP27]], !dbg [[DBG158]] +// CHECK1-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_LB]], align 4, !dbg [[DBG158]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP28]], [[TMP29]], !dbg [[DBG158]] +// CHECK1-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG158]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG150]], !llvm.loop [[LOOP188:![0-9]+]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB14:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG202:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG204:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x [10 x i32]]]* noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[B:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG205:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META206:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207:![0-9]+]] -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META208:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[C]], [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META209:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[A_ADDR]], metadata [[META210:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] -// CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META211:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] -// CHECK1-NEXT: store i8* [[BB]], i8** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META212:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG213:![0-9]+]] -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP7:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP5]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast [10 x [10 x i32]]* [[TMP7]] to [10 x [10 x i32]] addrspace(1)*, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast i8* [[TMP8]] to i8 addrspace(1)*, !dbg [[DBG213]] -// CHECK1-NEXT: call void @__omp_outlined___debug__1(i32* [[TMP3]], i32* [[TMP4]], [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP9]], i32 [[TMP6]], [10 x [10 x i32]] addrspace(1)* [[TMP10]], i8 addrspace(1)* [[TMP11]]) #[[ATTR3]], !dbg [[DBG213]] -// CHECK1-NEXT: ret void, !dbg [[DBG213]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB14:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG187:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG189:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27 -// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[B:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG214:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[B:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR7]] !dbg [[DBG190:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[C]], [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META217:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META193:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194:![0-9]+]] // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[A_ADDR]], metadata [[META219:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[A_ADDR]], metadata [[META195:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194]] // CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META220:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194]] // CHECK1-NEXT: store i8* [[BB]], i8** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META221:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG222:![0-9]+]] -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP3]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast [10 x [10 x i32]]* [[TMP5]] to [10 x [10 x i32]] addrspace(1)*, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast i8* [[TMP6]] to i8 addrspace(1)*, !dbg [[DBG222]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__([10 x [10 x [10 x i32]]] addrspace(1)* [[TMP7]], i32 [[TMP4]], [10 x [10 x i32]] addrspace(1)* [[TMP8]], i8 addrspace(1)* [[TMP9]]) #[[ATTR3]], !dbg [[DBG222]] -// CHECK1-NEXT: ret void, !dbg [[DBG222]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META197:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194]] +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG198:![0-9]+]] +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32*, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP3]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast [10 x [10 x i32]]* [[TMP5]] to [10 x [10 x i32]] addrspace(1)*, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast i8* [[TMP6]] to i8 addrspace(1)*, !dbg [[DBG198]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__([10 x [10 x [10 x i32]]] addrspace(1)* [[TMP7]], i32 [[TMP4]], [10 x [10 x i32]] addrspace(1)* [[TMP8]], i8 addrspace(1)* [[TMP9]]) #[[ATTR3]], !dbg [[DBG198]] +// CHECK1-NEXT: ret void, !dbg [[DBG198]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__ -// CHECK1-SAME: ([10 x [10 x [10 x i32]]] addrspace(1)* noalias noundef [[C:%.*]], i32 addrspace(1)* noalias noundef [[A:%.*]], [10 x [10 x i32]] addrspace(1)* noalias noundef [[B:%.*]], i8 addrspace(1)* noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG223:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]] addrspace(1)* noalias noundef [[C:%.*]], i32 addrspace(1)* noalias noundef [[A:%.*]], [10 x [10 x i32]] addrspace(1)* noalias noundef [[B:%.*]], i8 addrspace(1)* noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG199:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]] addrspace(1)*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32 addrspace(1)*, align 8 @@ -655,70 +555,67 @@ // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca [10 x [10 x i32]]*, align 8 // CHECK1-NEXT: [[_TMP3:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK1-NEXT: store [10 x [10 x [10 x i32]]] addrspace(1)* [[C]], [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META228:![0-9]+]], metadata !DIExpression()), !dbg [[DBG229:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META204:![0-9]+]], metadata !DIExpression()), !dbg [[DBG205:![0-9]+]] // CHECK1-NEXT: store i32 addrspace(1)* [[A]], i32 addrspace(1)** [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32 addrspace(1)** [[A_ADDR]], metadata [[META230:![0-9]+]], metadata !DIExpression()), !dbg [[DBG231:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32 addrspace(1)** [[A_ADDR]], metadata [[META206:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207:![0-9]+]] // CHECK1-NEXT: store [10 x [10 x i32]] addrspace(1)* [[B]], [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], metadata [[META232:![0-9]+]], metadata !DIExpression()), !dbg [[DBG233:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], metadata [[META208:![0-9]+]], metadata !DIExpression()), !dbg [[DBG209:![0-9]+]] // CHECK1-NEXT: store i8 addrspace(1)* [[BB]], i8 addrspace(1)** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8 addrspace(1)** [[BB_ADDR]], metadata [[META234:![0-9]+]], metadata !DIExpression()), !dbg [[DBG235:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]] addrspace(1)*, [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8, !dbg [[DBG236:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP0]] to [10 x [10 x [10 x i32]]]*, !dbg [[DBG236]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP1]], [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)** [[A_ADDR]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast i32 addrspace(1)* [[TMP3]] to i32*, !dbg [[DBG236]] -// CHECK1-NEXT: store i32* [[TMP4]], i32** [[_TMP1]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[_TMP1]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]] addrspace(1)*, [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x i32]] addrspace(1)* [[TMP6]] to [10 x [10 x i32]]*, !dbg [[DBG236]] -// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP7]], [10 x [10 x i32]]** [[_TMP2]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP8:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[_TMP2]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)** [[BB_ADDR]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast i8 addrspace(1)* [[TMP9]] to i8*, !dbg [[DBG236]] -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[_TMP3]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[_TMP3]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB19:[0-9]+]], i8 2, i1 false, i1 false), !dbg [[DBG236]] -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG236]] -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG236]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8 addrspace(1)** [[BB_ADDR]], metadata [[META210:![0-9]+]], metadata !DIExpression()), !dbg [[DBG211:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]] addrspace(1)*, [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8, !dbg [[DBG212:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP0]] to [10 x [10 x [10 x i32]]]*, !dbg [[DBG212]] +// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP1]], [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP3:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)** [[A_ADDR]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast i32 addrspace(1)* [[TMP3]] to i32*, !dbg [[DBG212]] +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[_TMP1]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[_TMP1]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]] addrspace(1)*, [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x i32]] addrspace(1)* [[TMP6]] to [10 x [10 x i32]]*, !dbg [[DBG212]] +// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP7]], [10 x [10 x i32]]** [[_TMP2]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP8:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[_TMP2]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)** [[BB_ADDR]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast i8 addrspace(1)* [[TMP9]] to i8*, !dbg [[DBG212]] +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[_TMP3]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[_TMP3]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB19:[0-9]+]], i8 2, i1 false, i1 false), !dbg [[DBG212]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG212]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG212]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB24:[0-9]+]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG237:![0-9]+]] -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast [10 x [10 x [10 x i32]]]* [[TMP2]] to i8*, !dbg [[DBG237]] -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8, !dbg [[DBG237]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG237]] -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP5]] to i8*, !dbg [[DBG237]] -// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8, !dbg [[DBG237]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG237]] -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast [10 x [10 x i32]]* [[TMP8]] to i8*, !dbg [[DBG237]] -// CHECK1-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 8, !dbg [[DBG237]] -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG237]] -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP20]], align 8, !dbg [[DBG237]] -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**, !dbg [[DBG237]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB24]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x [10 x [10 x i32]]]*, i32*, [10 x [10 x i32]]*, i8*)* @__omp_outlined__4 to i8*), i8* null, i8** [[TMP21]], i64 4), !dbg [[DBG237]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB26:[0-9]+]], i8 2, i1 false), !dbg [[DBG238:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG240:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG213:![0-9]+]] +// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP2]], [10 x [10 x [10 x i32]]]** [[TMP14]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG213]] +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[TMP15]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG213]] +// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP8]], [10 x [10 x i32]]** [[TMP16]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG213]] +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP17]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.anon.1* [[DOTTMP_OUTLINED_AGG_ARG]] to i8*, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP19:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 32), !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP20:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP18]], i8* [[TMP19]]), !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP21:%.*]] = load [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP20]] to %struct.anon.1*, !dbg [[DBG213]] +// CHECK1-NEXT: store [[STRUCT_ANON_1]] [[TMP21]], %struct.anon.1* [[TMP22]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB24]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.1*)* @__omp_outlined__2 to i8*), i8* null, i8* [[TMP20]]), !dbg [[DBG213]] +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[TMP19]], i64 32), !dbg [[DBG213]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB26:[0-9]+]], i8 2, i1 false), !dbg [[DBG214:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG216:![0-9]+]] // CHECK1: worker.exit: -// CHECK1-NEXT: ret void, !dbg [[DBG236]] +// CHECK1-NEXT: ret void, !dbg [[DBG212]] // // -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___debug__3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x [10 x i32]]] addrspace(1)* noalias noundef [[C:%.*]], i32 addrspace(1)* noalias noundef [[A:%.*]], [10 x [10 x i32]] addrspace(1)* noalias noundef [[B:%.*]], i8 addrspace(1)* noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG241:![0-9]+]] { +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] !dbg [[DBG217:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]] addrspace(1)*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32 addrspace(1)*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]] addrspace(1)*, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8 addrspace(1)*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 @@ -729,212 +626,160 @@ // CHECK1-NEXT: [[H:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META244:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META224:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225:![0-9]+]] // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META246:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]] addrspace(1)* [[C]], [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], metadata [[META247:![0-9]+]], metadata !DIExpression()), !dbg [[DBG248:![0-9]+]] -// CHECK1-NEXT: store i32 addrspace(1)* [[A]], i32 addrspace(1)** [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32 addrspace(1)** [[A_ADDR]], metadata [[META249:![0-9]+]], metadata !DIExpression()), !dbg [[DBG250:![0-9]+]] -// CHECK1-NEXT: store [10 x [10 x i32]] addrspace(1)* [[B]], [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], metadata [[META251:![0-9]+]], metadata !DIExpression()), !dbg [[DBG252:![0-9]+]] -// CHECK1-NEXT: store i8 addrspace(1)* [[BB]], i8 addrspace(1)** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8 addrspace(1)** [[BB_ADDR]], metadata [[META253:![0-9]+]], metadata !DIExpression()), !dbg [[DBG254:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]] addrspace(1)*, [10 x [10 x [10 x i32]]] addrspace(1)** [[C_ADDR]], align 8, !dbg [[DBG255:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP0]] to [10 x [10 x [10 x i32]]]*, !dbg [[DBG255]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[TMP1]], [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32 addrspace(1)*, i32 addrspace(1)** [[A_ADDR]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast i32 addrspace(1)* [[TMP3]] to i32*, !dbg [[DBG255]] -// CHECK1-NEXT: store i32* [[TMP4]], i32** [[_TMP1]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[_TMP1]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]] addrspace(1)*, [10 x [10 x i32]] addrspace(1)** [[B_ADDR]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x i32]] addrspace(1)* [[TMP6]] to [10 x [10 x i32]]*, !dbg [[DBG255]] -// CHECK1-NEXT: store [10 x [10 x i32]]* [[TMP7]], [10 x [10 x i32]]** [[_TMP2]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP8:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[_TMP2]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)** [[BB_ADDR]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast i8 addrspace(1)* [[TMP9]] to i8*, !dbg [[DBG255]] -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[_TMP3]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[_TMP3]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IV]], metadata [[META256:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_LB]], metadata [[META257:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG258:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_UB]], metadata [[META259:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] -// CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_STRIDE]], metadata [[META260:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IS_LAST]], metadata [[META261:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[I]], metadata [[META262:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4, !dbg [[DBG255]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB21:[0-9]+]], i32 [[TMP13]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG263:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG255]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META226:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata %struct.anon.1** [[__CONTEXT_ADDR]], metadata [[META227:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG228:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[TMP1]], align 8, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[TMP5]], align 8, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !dbg [[DBG228]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IV]], metadata [[META229:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_LB]], metadata [[META230:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG231:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_UB]], metadata [[META232:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_STRIDE]], metadata [[META233:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[DOTOMP_IS_LAST]], metadata [[META234:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[I]], metadata [[META235:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4, !dbg [[DBG228]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB21:[0-9]+]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG228]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG228]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 9, !dbg [[DBG258]] -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG258]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG231]] +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG231]] // CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG258]] +// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG231]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG258]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG231]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ], !dbg [[DBG258]] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]], !dbg [[DBG255]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG255]] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG231]] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG236:![0-9]+]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG228]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG255]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG228]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]], !dbg [[DBG255]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG236]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG228]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1, !dbg [[DBG264:![0-9]+]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG264]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !dbg [[DBG264]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[F]], metadata [[META265:![0-9]+]], metadata !DIExpression()), !dbg [[DBG267:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 1, !dbg [[DBG268:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG268]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG268]] -// CHECK1-NEXT: store i32* [[ARRAYIDX8]], i32** [[F]], align 8, !dbg [[DBG267]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[G]], metadata [[META269:![0-9]+]], metadata !DIExpression()), !dbg [[DBG270:![0-9]+]] -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[G]], align 8, !dbg [[DBG270]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[H]], metadata [[META271:![0-9]+]], metadata !DIExpression()), !dbg [[DBG272:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP8]], i64 0, i64 1, !dbg [[DBG273:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG273]] -// CHECK1-NEXT: store i32* [[ARRAYIDX10]], i32** [[H]], align 8, !dbg [[DBG272]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[D]], metadata [[META274:![0-9]+]], metadata !DIExpression()), !dbg [[DBG275:![0-9]+]] -// CHECK1-NEXT: store i32 15, i32* [[D]], align 4, !dbg [[DBG275]] -// CHECK1-NEXT: store i32 5, i32* [[TMP5]], align 4, !dbg [[DBG276:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP8]], i64 0, i64 0, !dbg [[DBG277:![0-9]+]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG278:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG277]] -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG277]] -// CHECK1-NEXT: store i32 10, i32* [[ARRAYIDX12]], align 4, !dbg [[DBG279:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG280:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG280]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG281:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG280]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG280]] -// CHECK1-NEXT: store i32 11, i32* [[ARRAYIDX16]], align 4, !dbg [[DBG282:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG283:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG283]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG284:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG283]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG283]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4, !dbg [[DBG283]] -// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP8]], i64 0, i64 0, !dbg [[DBG285:![0-9]+]] -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG286:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP26]] to i64, !dbg [[DBG285]] -// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG285]] -// CHECK1-NEXT: store i32 [[TMP25]], i32* [[ARRAYIDX23]], align 4, !dbg [[DBG287:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP8]], i64 0, i64 0, !dbg [[DBG288:![0-9]+]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG289:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP27]] to i64, !dbg [[DBG288]] -// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG288]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX26]], align 4, !dbg [[DBG288]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0, !dbg [[DBG288]] -// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG290:![0-9]+]] -// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[TMP11]], align 1, !dbg [[DBG290]] -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG291:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG237:![0-9]+]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG237]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !dbg [[DBG237]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[F]], metadata [[META238:![0-9]+]], metadata !DIExpression()), !dbg [[DBG240:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 1, !dbg [[DBG241:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG241]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG241]] +// CHECK1-NEXT: store i32* [[ARRAYIDX4]], i32** [[F]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[G]], metadata [[META242:![0-9]+]], metadata !DIExpression()), !dbg [[DBG243:![0-9]+]] +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[G]], align 8, !dbg [[DBG243]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[H]], metadata [[META244:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP6]], i64 0, i64 1, !dbg [[DBG246:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX5]], i64 0, i64 1, !dbg [[DBG246]] +// CHECK1-NEXT: store i32* [[ARRAYIDX6]], i32** [[H]], align 8, !dbg [[DBG245]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32* [[D]], metadata [[META247:![0-9]+]], metadata !DIExpression()), !dbg [[DBG248:![0-9]+]] +// CHECK1-NEXT: store i32 15, i32* [[D]], align 4, !dbg [[DBG248]] +// CHECK1-NEXT: store i32 5, i32* [[TMP4]], align 4, !dbg [[DBG249:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP6]], i64 0, i64 0, !dbg [[DBG250:![0-9]+]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG251:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG250]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX7]], i64 0, i64 [[IDXPROM]], !dbg [[DBG250]] +// CHECK1-NEXT: store i32 10, i32* [[ARRAYIDX8]], align 4, !dbg [[DBG252:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG253:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX9]], i64 0, i64 0, !dbg [[DBG253]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG254:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG253]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX10]], i64 0, i64 [[IDXPROM11]], !dbg [[DBG253]] +// CHECK1-NEXT: store i32 11, i32* [[ARRAYIDX12]], align 4, !dbg [[DBG255:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* [[TMP2]], i64 0, i64 0, !dbg [[DBG256:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG256]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG257:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG256]] +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG256]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4, !dbg [[DBG256]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP6]], i64 0, i64 0, !dbg [[DBG258:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG259:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG258]] +// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG258]] +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX19]], align 4, !dbg [[DBG260:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP6]], i64 0, i64 0, !dbg [[DBG261:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG262:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG261]] +// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX20]], i64 0, i64 [[IDXPROM21]], !dbg [[DBG261]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX22]], align 4, !dbg [[DBG261]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0, !dbg [[DBG261]] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG263:![0-9]+]] +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[TMP8]], align 1, !dbg [[DBG263]] +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG264:![0-9]+]] // CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG263]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG228]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP29]], 1, !dbg [[DBG255]] -// CHECK1-NEXT: store i32 [[ADD27]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG255]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG263]], !llvm.loop [[LOOP292:![0-9]+]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP26]], 1, !dbg [[DBG236]] +// CHECK1-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG236]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG228]], !llvm.loop [[LOOP265:![0-9]+]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG263]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG228]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP30]], [[TMP31]], !dbg [[DBG255]] -// CHECK1-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_LB]], align 4, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP32]], [[TMP33]], !dbg [[DBG255]] -// CHECK1-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG255]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG263]], !llvm.loop [[LOOP294:![0-9]+]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], [[TMP28]], !dbg [[DBG236]] +// CHECK1-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_LB]], align 4, !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP29]], [[TMP30]], !dbg [[DBG236]] +// CHECK1-NEXT: store i32 [[ADD25]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG236]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG228]], !llvm.loop [[LOOP267:![0-9]+]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB23:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG293:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG295:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x [10 x i32]]]* noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[B:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG296:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300:![0-9]+]] -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META301:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]] -// CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[C]], [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META302:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]] -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META303:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]] -// CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META304:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]] -// CHECK1-NEXT: store i8* [[BB]], i8** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META305:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG306:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP8:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP6]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast i32* [[TMP7]] to i32 addrspace(1)*, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast [10 x [10 x i32]]* [[TMP8]] to [10 x [10 x i32]] addrspace(1)*, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP13:%.*]] = addrspacecast i8* [[TMP9]] to i8 addrspace(1)*, !dbg [[DBG306]] -// CHECK1-NEXT: call void @__omp_outlined___debug__3(i32* [[TMP4]], i32* [[TMP5]], [10 x [10 x [10 x i32]]] addrspace(1)* [[TMP10]], i32 addrspace(1)* [[TMP11]], [10 x [10 x i32]] addrspace(1)* [[TMP12]], i8 addrspace(1)* [[TMP13]]) #[[ATTR3]], !dbg [[DBG306]] -// CHECK1-NEXT: ret void, !dbg [[DBG306]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB23:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG266:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG268:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41 -// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[B:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG307:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x [10 x i32]]* noundef nonnull align 4 dereferenceable(400) [[B:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR7]] !dbg [[DBG269:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store [10 x [10 x [10 x i32]]]* [[C]], [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META310:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x [10 x i32]]]** [[C_ADDR]], metadata [[META272:![0-9]+]], metadata !DIExpression()), !dbg [[DBG273:![0-9]+]] // CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META312:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META274:![0-9]+]], metadata !DIExpression()), !dbg [[DBG273]] // CHECK1-NEXT: store [10 x [10 x i32]]* [[B]], [10 x [10 x i32]]** [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META313:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata [10 x [10 x i32]]** [[B_ADDR]], metadata [[META275:![0-9]+]], metadata !DIExpression()), !dbg [[DBG273]] // CHECK1-NEXT: store i8* [[BB]], i8** [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META314:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311]] -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG315:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP4:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP4]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast i32* [[TMP5]] to i32 addrspace(1)*, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast [10 x [10 x i32]]* [[TMP6]] to [10 x [10 x i32]] addrspace(1)*, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast i8* [[TMP7]] to i8 addrspace(1)*, !dbg [[DBG315]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__([10 x [10 x [10 x i32]]] addrspace(1)* [[TMP8]], i32 addrspace(1)* [[TMP9]], [10 x [10 x i32]] addrspace(1)* [[TMP10]], i8 addrspace(1)* [[TMP11]]) #[[ATTR3]], !dbg [[DBG315]] -// CHECK1-NEXT: ret void, !dbg [[DBG315]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i8** [[BB_ADDR]], metadata [[META276:![0-9]+]], metadata !DIExpression()), !dbg [[DBG273]] +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG277:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP2:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP4:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP4]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast i32* [[TMP5]] to i32 addrspace(1)*, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast [10 x [10 x i32]]* [[TMP6]] to [10 x [10 x i32]] addrspace(1)*, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast i8* [[TMP7]] to i8 addrspace(1)*, !dbg [[DBG277]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__([10 x [10 x [10 x i32]]] addrspace(1)* [[TMP8]], i32 addrspace(1)* [[TMP9]], [10 x [10 x i32]] addrspace(1)* [[TMP10]], i8 addrspace(1)* [[TMP11]]) #[[ATTR3]], !dbg [[DBG277]] +// CHECK1-NEXT: ret void, !dbg [[DBG277]] // diff --git a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp @@ -52,292 +52,298 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]], i8** [[TMP1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store i8** [[TMP3]], i8*** [[TMP2]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP28:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 9, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint i8* [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP13]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP14]] +// CHECK1-NEXT: store i32 0, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8*, i8** [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint i8* [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP16]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint i8* [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store i8** [[_TMP6]], i8*** [[_TMP5]], align 8 -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[_TMP6]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint i8* [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store i8** [[_TMP5]], i8*** [[_TMP4]], align 8 +// CHECK1-NEXT: store i8* [[TMP24]], i8** [[_TMP5]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, i64* [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[TMP30]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP31]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8*, i8** [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load i8*, i8** [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, i8* [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP36]] -// CHECK1-NEXT: [[TMP37:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8*, i8** [[TMP37]], i64 9 -// CHECK1-NEXT: [[TMP38:%.*]] = load i8*, i8** [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, i8* [[TMP38]], i64 [[LB_ADD_LEN10]] -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 -// CHECK1-NEXT: store i8* [[ARRAYIDX9]], i8** [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint i8* [[ARRAYIDX12]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint i8* [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] -// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 -// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP45]], i64* [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..1 to i8*), i8** [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..2 to i8*), i8** [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, i32* [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 -// CHECK1-NEXT: [[TMP53:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* -// CHECK1-NEXT: [[TMP54:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, i8* [[TMP53]]) -// CHECK1-NEXT: store i8* [[TMP54]], i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP56]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP57]], 9 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[ARGC]] to i8* +// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP2]] to i8* +// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, i64* [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP34]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP36:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8*, i8** [[TMP36]], i64 0 +// CHECK1-NEXT: [[TMP37:%.*]] = load i8*, i8** [[ARRAYIDX7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, i8* [[TMP37]], i64 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP39]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8*, i8** [[TMP40]], i64 9 +// CHECK1-NEXT: [[TMP41:%.*]] = load i8*, i8** [[ARRAYIDX10]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 +// CHECK1-NEXT: store i8* [[ARRAYIDX8]], i8** [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = ptrtoint i8* [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint i8* [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP45:%.*]] = sub i64 [[TMP43]], [[TMP44]] +// CHECK1-NEXT: [[TMP46:%.*]] = sdiv exact i64 [[TMP45]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP47:%.*]] = add nuw i64 [[TMP46]], 1 +// CHECK1-NEXT: [[TMP48:%.*]] = mul nuw i64 [[TMP47]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP48]], i64* [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..1 to i8*), i8** [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP51]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..2 to i8*), i8** [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, i32* [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[TMP54]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* +// CHECK1-NEXT: [[TMP57:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP55]], i32 1, i32 2, i8* [[TMP56]]) +// CHECK1-NEXT: store i8* [[TMP57]], i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, i32* [[TMP58]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP59]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP60:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP60]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP58]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP61]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP59]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP62]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP60]], [[TMP61]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP63:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP63]], [[TMP64]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP62]], 1 +// CHECK1-NEXT: [[TMP65:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP65]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP63]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[ARGC1]], i32** [[TMP64]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP66:%.*]] = load i8**, i8*** [[_TMP5]], align 8 -// CHECK1-NEXT: store i8** [[TMP66]], i8*** [[TMP65]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = load i32, i32* [[TMP67]], align 4 -// CHECK1-NEXT: [[TMP69:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP68]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP70:%.*]] = bitcast i8* [[TMP69]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP70]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP71]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP73:%.*]] = load i8*, i8** [[TMP72]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP73]], i8* align 8 [[TMP74]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP70]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP75]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP77:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store i8* [[TMP77]], i8** [[TMP76]], align 8 -// CHECK1-NEXT: [[TMP78:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP79:%.*]] = load i32, i32* [[TMP78]], align 4 -// CHECK1-NEXT: [[TMP80:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP79]], i8* [[TMP69]]) +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP66]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[ARGC]], i32** [[TMP67]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP69:%.*]] = load i8**, i8*** [[_TMP4]], align 8 +// CHECK1-NEXT: store i8** [[TMP69]], i8*** [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load i32, i32* [[TMP70]], align 4 +// CHECK1-NEXT: [[TMP72:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP71]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP73:%.*]] = bitcast i8* [[TMP72]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP73]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP74]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP76:%.*]] = load i8*, i8** [[TMP75]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP76]], i8* align 8 [[TMP77]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP73]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP78]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP80:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store i8* [[TMP80]], i8** [[TMP79]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = load i32, i32* [[TMP81]], align 4 +// CHECK1-NEXT: [[TMP83:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP82]], i8* [[TMP72]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP81:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP81]], 1 -// CHECK1-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP84]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP82:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP83:%.*]] = load i32, i32* [[TMP82]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP83]]) -// CHECK1-NEXT: [[TMP84:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, i32* [[TMP84]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP85]], i32 1) -// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP87:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP87]], i8** [[TMP86]], align 8 -// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP88]], align 8 -// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP90:%.*]] = inttoptr i64 [[TMP11]] to i8* +// CHECK1-NEXT: [[TMP85:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP86:%.*]] = load i32, i32* [[TMP85]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP86]]) +// CHECK1-NEXT: [[TMP87:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, i32* [[TMP87]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP88]], i32 1) +// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP90:%.*]] = bitcast i32* [[ARGC]] to i8* // CHECK1-NEXT: store i8* [[TMP90]], i8** [[TMP89]], align 8 -// CHECK1-NEXT: [[TMP91:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP92:%.*]] = load i32, i32* [[TMP91]], align 4 -// CHECK1-NEXT: [[TMP93:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP94:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP92]], i32 2, i64 24, i8* [[TMP93]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP94]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP91]], align 8 +// CHECK1-NEXT: [[TMP92:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP93:%.*]] = inttoptr i64 [[TMP14]] to i8* +// CHECK1-NEXT: store i8* [[TMP93]], i8** [[TMP92]], align 8 +// CHECK1-NEXT: [[TMP94:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP95:%.*]] = load i32, i32* [[TMP94]], align 4 +// CHECK1-NEXT: [[TMP96:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP97:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP95]], i32 2, i64 24, i8* [[TMP96]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP97]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP95:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP96:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP95]], [[TMP96]] -// CHECK1-NEXT: store i32 [[ADD15]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP97:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP97]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP98:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP99:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP98]], [[TMP99]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP100:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP100]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP98:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP98]] to i32 -// CHECK1-NEXT: [[TMP99:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP99]] to i32 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]] -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP101:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP101]] to i32 +// CHECK1-NEXT: [[TMP102:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP102]] to i32 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK1-NEXT: store i8 [[CONV18]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP97]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done22: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP92]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP100]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done21: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP95]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP100:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP101:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP100]] monotonic, align 4 -// CHECK1-NEXT: [[TMP102:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP102]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] -// CHECK1: omp.arraycpy.body24: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP103:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP103]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1 +// CHECK1-NEXT: [[TMP103:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP104:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP103]] monotonic, align 4 +// CHECK1-NEXT: [[TMP105:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP105]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] +// CHECK1: omp.arraycpy.body23: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP106:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP106]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP104:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP109:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP104]], i8* [[_TMP28]], align 1 -// CHECK1-NEXT: [[TMP105:%.*]] = load i8, i8* [[_TMP28]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP105]] to i32 -// CHECK1-NEXT: [[TMP106:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP106]] to i32 -// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]] -// CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK1-NEXT: store i8 [[CONV32]], i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP107:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP108:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP104]], i8 [[TMP107]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP109]] = extractvalue { i8, i1 } [[TMP108]], 0 -// CHECK1-NEXT: [[TMP110:%.*]] = extractvalue { i8, i1 } [[TMP108]], 1 -// CHECK1-NEXT: br i1 [[TMP110]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP107:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP112:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP107]], i8* [[_TMP27]], align 1 +// CHECK1-NEXT: [[TMP108:%.*]] = load i8, i8* [[_TMP27]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP108]] to i32 +// CHECK1-NEXT: [[TMP109:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP109]] to i32 +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] +// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK1-NEXT: store i8 [[CONV31]], i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP110:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP111:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP107]], i8 [[TMP110]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP112]] = extractvalue { i8, i1 } [[TMP111]], 0 +// CHECK1-NEXT: [[TMP113:%.*]] = extractvalue { i8, i1 } [[TMP111]], 1 +// CHECK1-NEXT: br i1 [[TMP113]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP102]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]] -// CHECK1: omp.arraycpy.done36: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP105]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] +// CHECK1: omp.arraycpy.done35: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP111:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP111]]) +// CHECK1-NEXT: [[TMP114:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP114]]) // CHECK1-NEXT: ret void // // @@ -447,7 +453,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP4_I:%.*]] = alloca i8*, align 8 @@ -461,7 +467,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -474,29 +480,29 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load i8*, i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 // CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load i8**, i8*** [[TMP30]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i8*, i8** [[TMP31]], i64 9 // CHECK1-NEXT: [[TMP32:%.*]] = load i8*, i8** [[ARRAYIDX2_I]], align 8 @@ -507,10 +513,10 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 // CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint i8* [[TMP43]] to i64 diff --git a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp @@ -311,7 +311,7 @@ // CHECK1-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 // CHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[K_CASTED:%.*]] = alloca i64, align 8 @@ -578,15 +578,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -596,52 +598,54 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 5, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 33, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -657,7 +661,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 @@ -668,7 +672,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) @@ -679,8 +683,8 @@ // CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !25 // CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !25 // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !25 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !25 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !25 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !25 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !25 // CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0, i32 0, i8* null, i32 0, i8* null) #[[ATTR4]] // CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -696,29 +700,28 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[K_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[K_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP2]], i64* [[K_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[K_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[K_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[TMP3]], i64* [[TMP2]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[K:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -730,73 +733,78 @@ // CHECK1-NEXT: [[K1:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[K_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP0]], i64* [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[K]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[K]], align 8 +// CHECK1-NEXT: store i64 [[TMP5]], i64* [[DOTLINEAR_START]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 8, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]]) -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 35, i32 0, i32 8, i32 1, i32 1) +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 35, i32 0, i32 8, i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK1-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !26 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !26 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[A]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[K1]], align 8 -// CHECK1-NEXT: store i64 [[TMP16]], i64* [[K_ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[K1]], align 8 +// CHECK1-NEXT: store i64 [[TMP21]], i64* [[K]], align 8 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -808,145 +816,146 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[LIN_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[LIN]], i64* [[LIN_ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK1-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[LIN:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTLINEAR_START3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTLINEAR_START1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_STEP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[IT:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[LIN4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[A5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[LIN2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[LIN]], i64* [[LIN_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 4 +// CHECK1-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[LIN]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[LIN]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START1]], align 4 // CHECK1-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK1-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !29 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 -// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK1-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK1-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !29 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !29 -// CHECK1-NEXT: [[MUL8:%.*]] = mul i64 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: [[ADD:%.*]] = add i64 [[CONV7]], [[MUL8]] -// CHECK1-NEXT: [[CONV9:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK1-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[CONV10:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !29 -// CHECK1-NEXT: [[MUL11:%.*]] = mul i64 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] -// CHECK1-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 -// CHECK1-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !29 -// CHECK1-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 -// CHECK1-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] +// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK1-NEXT: store i32 [[CONV6]], i32* [[LIN2]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] +// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 +// CHECK1-NEXT: store i32 [[CONV10]], i32* [[A3]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP23:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !29 +// CHECK1-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 +// CHECK1-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 +// CHECK1-NEXT: store i16 [[CONV13]], i16* [[AA]], align 2, !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 -// CHECK1-NEXT: [[ADD17:%.*]] = add i64 [[TMP17]], 1 -// CHECK1-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 +// CHECK1-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i64 400, i64* [[IT]], align 8 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK1-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[LIN2]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], i32* [[LIN]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], i32* [[A]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -957,31 +966,29 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -991,65 +998,69 @@ // CHECK1-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 3, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !32 -// CHECK1-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 -// CHECK1-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK1-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2, !llvm.access.group !32 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK1-NEXT: store i16 [[CONV]], i16* [[IT]], align 2, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !32 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 +// CHECK1-NEXT: store i16 [[CONV5]], i16* [[AA]], align 2, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i16 22, i16* [[IT]], align 2 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1070,8 +1081,7 @@ // CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -1092,33 +1102,40 @@ // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [10 x float]*, i64, float*, [5 x [10 x double]]*, i64, i64, double*, %struct.TT*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], [10 x float]* [[TMP0]], i64 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]], i64 [[TMP11]]) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store float* [[TMP2]], float** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP5]], i64* [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK1-NEXT: store double* [[TMP6]], double** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK1-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV5]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1128,127 +1145,131 @@ // CHECK1-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 -// CHECK1-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 -// CHECK1-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 10 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 8 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] -// CHECK1-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK1-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !35 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double -// CHECK1-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 -// CHECK1-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK1-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double -// CHECK1-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 -// CHECK1-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK1-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !35 -// CHECK1-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !35 -// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !35 -// CHECK1-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !35 -// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !35 -// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK1-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !35 -// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !35 -// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 -// CHECK1-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK1-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 +// CHECK1-NEXT: store i8 [[CONV]], i8* [[IT]], align 1, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP33:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK1-NEXT: store float [[CONV5]], float* [[ARRAYIDX]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 3 +// CHECK1-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double +// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 +// CHECK1-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float +// CHECK1-NEXT: store float [[CONV9]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX10]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX11]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 [[TMP36]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX13]], i64 3 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK1-NEXT: store i64 [[ADD16]], i64* [[X]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP39:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK1-NEXT: store i8 [[CONV19]], i8* [[Y]], align 8, !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK1-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK1-NEXT: store i32 [[ADD27]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK1-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK1-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK1-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i8 96, i8* [[IT]], align 1 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1557,7 +1578,7 @@ // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -1568,24 +1589,28 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -1595,76 +1620,79 @@ // CHECK1-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 8 // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !38 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 -// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK1-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK1-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !38 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[B]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], double* [[A]], align 8, !llvm.access.group !38 -// CHECK1-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 8, !llvm.access.group !38 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], double* [[A5]], align 8, !llvm.access.group !38 -// CHECK1-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP14]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2, !llvm.access.group !38 +// CHECK1-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = load double, double* [[A2]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], double* [[A2]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i64 [[TMP21]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV3]], i16* [[ARRAYIDX4]], align 2, !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 -// CHECK1-NEXT: [[ADD8:%.*]] = add i64 [[TMP15]], 1 -// CHECK1-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK1-NEXT: store i64 [[ADD5]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i64 400, i64* [[IT]], align 8 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1679,9 +1707,7 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 @@ -1690,43 +1716,47 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* -// CHECK1-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV2]], align 1 +// CHECK1-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK1-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 8 // CHECK1-NEXT: ret void // // @@ -1736,34 +1766,33 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -1773,70 +1802,74 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 6, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !41 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !41 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !41 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !41 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !41 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !41 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !41 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !41 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !41 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK1-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 +// CHECK1-NEXT: store i64 [[ADD6]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !41 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i64 11, i64* [[I]], align 8 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1869,7 +1902,7 @@ // CHECK3-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 // CHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK3-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[LIN:%.*]] = alloca i32, align 4 @@ -2126,15 +2159,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2144,52 +2179,54 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 5, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK3-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 33, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2205,7 +2242,7 @@ // CHECK3-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 4 // CHECK3-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 4 // CHECK3-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 @@ -2216,7 +2253,7 @@ // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK3-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* // CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) // CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) @@ -2227,8 +2264,8 @@ // CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !26 // CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !26 // CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !26 -// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !26 -// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !26 +// CHECK3-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !26 +// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !26 // CHECK3-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0, i32 0, i8* null, i32 0, i8* null) #[[ATTR4]] // CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK3-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -2244,24 +2281,26 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[K_ADDR:%.*]] = alloca i64*, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i64* [[K]], i64** [[K_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i64*, i64** [[K_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i64*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP2]], i64* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i64* [[TMP0]], i64** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i64* noundef nonnull align 4 dereferenceable(8) [[K:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[K_ADDR:%.*]] = alloca i64*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -2270,76 +2309,80 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[K1:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i64* [[K]], i64** [[K_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64*, i64** [[K_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 8 -// CHECK3-NEXT: store i64 [[TMP1]], i64* [[DOTLINEAR_START]], align 8 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK3-NEXT: store i64 [[TMP5]], i64* [[DOTLINEAR_START]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 8, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 35, i32 0, i32 8, i32 1, i32 1) +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 35, i32 0, i32 8, i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP4]], 0 +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK3-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !27 -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !27 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK3-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 -// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK3-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !27 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !27 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !27 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !27 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK3-NEXT: [[MUL1:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[MUL1]] to i64 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] +// CHECK3-NEXT: store i64 [[ADD]], i64* [[K]], align 8, !llvm.access.group !27 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !27 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !27 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 1, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[K1]], align 8 -// CHECK3-NEXT: store i64 [[TMP17]], i64* [[TMP0]], align 8 +// CHECK3-NEXT: [[TMP21:%.*]] = load i64, i64* [[K]], align 8 +// CHECK3-NEXT: store i64 [[TMP21]], i64* [[TMP4]], align 8 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: // CHECK3-NEXT: ret void @@ -2351,35 +2394,33 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[LIN_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[LIN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[LIN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[LIN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[LIN_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[LIN:%.*]], i32 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK3-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -2394,96 +2435,103 @@ // CHECK3-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[LIN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START1]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 4 +// CHECK3-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[LIN]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[LIN]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START1]], align 4 // CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK3-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 // CHECK3-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP13]], i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !30 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !30 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 -// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK3-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK3-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !30 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !30 -// CHECK3-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !30 -// CHECK3-NEXT: [[MUL6:%.*]] = mul i64 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: [[ADD:%.*]] = add i64 [[CONV5]], [[MUL6]] -// CHECK3-NEXT: [[CONV7:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK3-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4, !llvm.access.group !30 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4, !llvm.access.group !30 -// CHECK3-NEXT: [[CONV8:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK3-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !30 -// CHECK3-NEXT: [[MUL9:%.*]] = mul i64 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] -// CHECK3-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 -// CHECK3-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !30 -// CHECK3-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 -// CHECK3-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 -// CHECK3-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !30 +// CHECK3-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] +// CHECK3-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK3-NEXT: store i32 [[CONV6]], i32* [[LIN2]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK3-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !30 +// CHECK3-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] +// CHECK3-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 +// CHECK3-NEXT: store i32 [[CONV10]], i32* [[A3]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP23:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !30 +// CHECK3-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 +// CHECK3-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 +// CHECK3-NEXT: store i16 [[CONV13]], i16* [[AA]], align 2, !llvm.access.group !30 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 -// CHECK3-NEXT: [[ADD15:%.*]] = add i64 [[TMP17]], 1 -// CHECK3-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK3-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 +// CHECK3-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i64 400, i64* [[IT]], align 8 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN2]], align 4 -// CHECK3-NEXT: store i32 [[TMP22]], i32* [[LIN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK3-NEXT: store i32 [[TMP23]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[LIN2]], align 4 +// CHECK3-NEXT: store i32 [[TMP29]], i32* [[LIN]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], i32* [[A]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: // CHECK3-NEXT: ret void @@ -2494,29 +2542,28 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2526,64 +2573,69 @@ // CHECK3-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 3, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] -// CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2, !llvm.access.group !33 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !33 -// CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK3-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK3-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2, !llvm.access.group !33 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK3-NEXT: store i16 [[CONV]], i16* [[IT]], align 2, !llvm.access.group !33 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !33 +// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[AA]], align 2, !llvm.access.group !33 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i16 22, i16* [[IT]], align 2 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2604,8 +2656,7 @@ // CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -2624,31 +2675,40 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x float]*, i32, float*, [5 x [10 x double]]*, i32, i32, double*, %struct.TT*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP9]], [10 x float]* [[TMP0]], i32 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[TMP2]], float** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK3-NEXT: store double* [[TMP6]], double** [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK3-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2658,125 +2718,131 @@ // CHECK3-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 -// CHECK3-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 -// CHECK3-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK3-NEXT: store i8 [[CONV]], i8* [[IT]], align 1, !llvm.access.group !36 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP33:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK3-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK3-NEXT: store float [[CONV5]], float* [[ARRAYIDX]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 3 +// CHECK3-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK3-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK3-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK3-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK3-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !36 -// CHECK3-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !36 -// CHECK3-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK3-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] -// CHECK3-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !36 -// CHECK3-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !36 -// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK3-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK3-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK3-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: store float [[CONV9]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i32 0, i32 1 +// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX10]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX11]], align 8, !llvm.access.group !36 +// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !36 +// CHECK3-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 [[TMP36]] +// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX13]], i32 3 +// CHECK3-NEXT: [[TMP37:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !36 +// CHECK3-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !36 +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP38:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK3-NEXT: store i64 [[ADD16]], i64* [[X]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP39:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK3-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK3-NEXT: store i8 [[CONV19]], i8* [[Y]], align 4, !llvm.access.group !36 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK3-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK3-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK3-NEXT: store i32 [[ADD25]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK3-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK3-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK3-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK3-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i8 96, i8* [[IT]], align 1 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3082,7 +3148,7 @@ // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -3092,23 +3158,28 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -3118,75 +3189,79 @@ // CHECK3-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 4 // CHECK3-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP15]], i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 -// CHECK3-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !39 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !39 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 -// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 +// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK3-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK3-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !39 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[B]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], double* [[A]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, double* [[A4]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], double* [[A4]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK3-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP14]] -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2, !llvm.access.group !39 +// CHECK3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = load double, double* [[A2]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], double* [[A2]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 [[TMP21]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: store i16 [[CONV3]], i16* [[ARRAYIDX4]], align 2, !llvm.access.group !39 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 -// CHECK3-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK3-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 +// CHECK3-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 +// CHECK3-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK3-NEXT: store i64 [[ADD5]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i64 400, i64* [[IT]], align 8 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3201,9 +3276,7 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 @@ -3211,41 +3284,47 @@ // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* -// CHECK3-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK3-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 -// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK3-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -3255,32 +3334,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -3290,69 +3369,74 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK3-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 6, i64* [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !42 -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !42 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !42 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !42 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !42 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK3-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !42 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !42 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !42 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !42 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK3-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !42 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !42 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK3-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !42 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 +// CHECK3-NEXT: store i64 [[ADD6]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !42 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i64 11, i64* [[I]], align 8 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3385,7 +3469,7 @@ // CHECK5-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 // CHECK5-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 -// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK5-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[K_CASTED:%.*]] = alloca i64, align 8 @@ -3652,15 +3736,17 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK5-SAME: () #[[ATTR2:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3670,52 +3756,54 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 5, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK5-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK5-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 33, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3731,7 +3819,7 @@ // CHECK5-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK5-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK5-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK5-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 @@ -3742,7 +3830,7 @@ // CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK5-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* // CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) // CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) @@ -3753,8 +3841,8 @@ // CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !25 // CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !25 // CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !25 -// CHECK5-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !25 -// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !25 +// CHECK5-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !25 +// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !25 // CHECK5-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0, i32 0, i8* null, i32 0, i8* null) #[[ATTR4]] // CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK5-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -3770,29 +3858,28 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[K_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[K_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[TMP2]], i64* [[K_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[K_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[K_ADDR]], align 8 +// CHECK5-NEXT: store i64 [[TMP3]], i64* [[TMP2]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[K:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -3804,73 +3891,78 @@ // CHECK5-NEXT: [[K1:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[K_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[TMP0]], i64* [[DOTLINEAR_START]], align 8 +// CHECK5-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[K]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[K]], align 8 +// CHECK5-NEXT: store i64 [[TMP5]], i64* [[DOTLINEAR_START]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 8, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]]) -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) +// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK5-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK5-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !26 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 -// CHECK5-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !26 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK5-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] // CHECK5-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !26 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[A]], align 4, !llvm.access.group !26 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK5-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 1, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK5: .omp.linear.pu: -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[K1]], align 8 -// CHECK5-NEXT: store i64 [[TMP16]], i64* [[K_ADDR]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, i64* [[K1]], align 8 +// CHECK5-NEXT: store i64 [[TMP21]], i64* [[K]], align 8 // CHECK5-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK5: .omp.linear.pu.done: // CHECK5-NEXT: ret void @@ -3882,145 +3974,146 @@ // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[LIN_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK5-NEXT: store i64 [[LIN]], i64* [[LIN_ADDR]], align 8 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK5-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK5-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK5-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK5-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[LIN:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTLINEAR_START3:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTLINEAR_START1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTLINEAR_STEP:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[IT:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[LIN4:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[A5:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[LIN2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[LIN]], i64* [[LIN_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* -// CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 +// CHECK5-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 4 +// CHECK5-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], i32* [[LIN]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], i32* [[A]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[LIN]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START1]], align 4 // CHECK5-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK5-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 // CHECK5-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK5-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK5-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK5-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTOMP_IV]], align 8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !29 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 -// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK5-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK5-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !29 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !29 -// CHECK5-NEXT: [[CONV7:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !29 -// CHECK5-NEXT: [[MUL8:%.*]] = mul i64 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: [[ADD:%.*]] = add i64 [[CONV7]], [[MUL8]] -// CHECK5-NEXT: [[CONV9:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK5-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4, !llvm.access.group !29 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4, !llvm.access.group !29 -// CHECK5-NEXT: [[CONV10:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !29 -// CHECK5-NEXT: [[MUL11:%.*]] = mul i64 [[TMP14]], [[TMP15]] -// CHECK5-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] -// CHECK5-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 -// CHECK5-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !29 -// CHECK5-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !29 -// CHECK5-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK5-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 -// CHECK5-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK5-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !29 +// CHECK5-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK5-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] +// CHECK5-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] +// CHECK5-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK5-NEXT: store i32 [[CONV6]], i32* [[LIN2]], align 4, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4, !llvm.access.group !29 +// CHECK5-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] +// CHECK5-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] +// CHECK5-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 +// CHECK5-NEXT: store i32 [[CONV10]], i32* [[A3]], align 4, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP23:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !29 +// CHECK5-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 +// CHECK5-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 +// CHECK5-NEXT: store i16 [[CONV13]], i16* [[AA]], align 2, !llvm.access.group !29 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 -// CHECK5-NEXT: [[ADD17:%.*]] = add i64 [[TMP17]], 1 -// CHECK5-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 +// CHECK5-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK5-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK5-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i64 400, i64* [[IT]], align 8 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK5: .omp.linear.pu: -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK5-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK5-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[LIN2]], align 4 +// CHECK5-NEXT: store i32 [[TMP29]], i32* [[LIN]], align 4 +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK5-NEXT: store i32 [[TMP30]], i32* [[A]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK5: .omp.linear.pu.done: // CHECK5-NEXT: ret void @@ -4031,31 +4124,29 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK5-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK5-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4065,65 +4156,69 @@ // CHECK5-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK5-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK5-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 3, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] -// CHECK5-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK5-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !32 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !32 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !32 -// CHECK5-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !32 -// CHECK5-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 -// CHECK5-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK5-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2, !llvm.access.group !32 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK5-NEXT: store i16 [[CONV]], i16* [[IT]], align 2, !llvm.access.group !32 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !32 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !32 +// CHECK5-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !32 +// CHECK5-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK5-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 +// CHECK5-NEXT: store i16 [[CONV5]], i16* [[AA]], align 2, !llvm.access.group !32 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i16 22, i16* [[IT]], align 2 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4144,8 +4239,7 @@ // CHECK5-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 // CHECK5-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -4166,33 +4260,40 @@ // CHECK5-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK5-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 -// CHECK5-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [10 x float]*, i64, float*, [5 x [10 x double]]*, i64, i64, double*, %struct.TT*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], [10 x float]* [[TMP0]], i64 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]], i64 [[TMP11]]) +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store i64 [[TMP1]], i64* [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store float* [[TMP2]], float** [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK5-NEXT: store i64 [[TMP5]], i64* [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK5-NEXT: store double* [[TMP6]], double** [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK5-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV5]], align 4 +// CHECK5-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 -// CHECK5-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 -// CHECK5-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4202,127 +4303,131 @@ // CHECK5-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK5-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK5-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 -// CHECK5-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 -// CHECK5-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 -// CHECK5-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK5-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK5-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 7 +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 8 +// CHECK5-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 9 +// CHECK5-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 10 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 8 +// CHECK5-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 -// CHECK5-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] -// CHECK5-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK5-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !35 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !35 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK5-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4, !llvm.access.group !35 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 -// CHECK5-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double -// CHECK5-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 -// CHECK5-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK5-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !35 -// CHECK5-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK5-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !35 -// CHECK5-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double -// CHECK5-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 -// CHECK5-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK5-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !35 -// CHECK5-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 -// CHECK5-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !35 -// CHECK5-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK5-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !35 -// CHECK5-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK5-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] -// CHECK5-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK5-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !35 -// CHECK5-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK5-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !35 -// CHECK5-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !35 -// CHECK5-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK5-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !35 -// CHECK5-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !35 -// CHECK5-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK5-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 -// CHECK5-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK5-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !35 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 +// CHECK5-NEXT: store i8 [[CONV]], i8* [[IT]], align 1, !llvm.access.group !35 +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[A]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i64 0, i64 2 +// CHECK5-NEXT: [[TMP33:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK5-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK5-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK5-NEXT: store float [[CONV5]], float* [[ARRAYIDX]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 3 +// CHECK5-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double +// CHECK5-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 +// CHECK5-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float +// CHECK5-NEXT: store float [[CONV9]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i64 0, i64 1 +// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX10]], i64 0, i64 2 +// CHECK5-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX11]], align 8, !llvm.access.group !35 +// CHECK5-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK5-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !35 +// CHECK5-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 [[TMP36]] +// CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX13]], i64 3 +// CHECK5-NEXT: [[TMP37:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !35 +// CHECK5-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK5-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !35 +// CHECK5-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP38:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !35 +// CHECK5-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK5-NEXT: store i64 [[ADD16]], i64* [[X]], align 8, !llvm.access.group !35 +// CHECK5-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP39:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !35 +// CHECK5-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK5-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK5-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK5-NEXT: store i8 [[CONV19]], i8* [[Y]], align 8, !llvm.access.group !35 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK5-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK5-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK5-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK5-NEXT: store i32 [[ADD27]], i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK5-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK5-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) -// CHECK5-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK5-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK5-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK5-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i8 96, i8* [[IT]], align 1 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4655,8 +4760,7 @@ // CHECK5-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) @@ -4672,27 +4776,33 @@ // CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK5-NEXT: [[TMP4:%.*]] = load i16*, i16** [[C_ADDR]], align 8 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[CONV4]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 -// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 -// CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store i64 [[TMP3]], i64* [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: store i16* [[TMP4]], i16** [[TMP10]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV3]], align 1 +// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK5-NEXT: store i8 [[FROMBOOL]], i8* [[CONV5]], align 1 -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 1 -// CHECK5-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK5-NEXT: br i1 [[TOBOOL6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK5-NEXT: store i8 [[FROMBOOL]], i8* [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = load i8, i8* [[CONV3]], align 1 +// CHECK5-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK5-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_IF_END:%.*]] // CHECK5: omp_if.else: // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK5-NEXT: call void @.omp_outlined..10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]]) #[[ATTR4]] +// CHECK5-NEXT: call void @.omp_outlined..10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: @@ -4700,16 +4810,13 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK5-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -4719,138 +4826,144 @@ // CHECK5-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK5-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK5-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP11]], align 8 +// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK5-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK5-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK5-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV3]], align 1 -// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 -// CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK5-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP7]], 3 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP16]], 3 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK5-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 -// CHECK5-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !38 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 +// CHECK5-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !38 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i64 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 -// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 +// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP21]], 400 // CHECK5-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK5-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !38 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !38 -// CHECK5-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double -// CHECK5-NEXT: [[ADD:%.*]] = fadd double [[CONV5]], 1.500000e+00 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[B]], align 4, !llvm.access.group !38 +// CHECK5-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double +// CHECK5-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK5-NEXT: store double [[ADD]], double* [[A]], align 8, !nontemporal !39, !llvm.access.group !38 -// CHECK5-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP14:%.*]] = load double, double* [[A6]], align 8, !nontemporal !39, !llvm.access.group !38 -// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK5-NEXT: store double [[INC]], double* [[A6]], align 8, !nontemporal !39, !llvm.access.group !38 -// CHECK5-NEXT: [[CONV7:%.*]] = fptosi double [[INC]] to i16 -// CHECK5-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP15]] -// CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK5-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2, !llvm.access.group !38 +// CHECK5-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP23:%.*]] = load double, double* [[A3]], align 8, !nontemporal !39, !llvm.access.group !38 +// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK5-NEXT: store double [[INC]], double* [[A3]], align 8, !nontemporal !39, !llvm.access.group !38 +// CHECK5-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK5-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i64 [[TMP24]] +// CHECK5-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK5-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2, !llvm.access.group !38 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 -// CHECK5-NEXT: [[ADD9:%.*]] = add i64 [[TMP16]], 1 -// CHECK5-NEXT: store i64 [[ADD9]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 +// CHECK5-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 +// CHECK5-NEXT: [[ADD6:%.*]] = add i64 [[TMP25]], 1 +// CHECK5-NEXT: store i64 [[ADD6]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_IF_END:%.*]] // CHECK5: omp_if.else: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK5-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[CMP10:%.*]] = icmp ugt i64 [[TMP19]], 3 -// CHECK5-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK5: cond.true11: -// CHECK5-NEXT: br label [[COND_END13:%.*]] -// CHECK5: cond.false12: -// CHECK5-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: br label [[COND_END13]] -// CHECK5: cond.end13: -// CHECK5-NEXT: [[COND14:%.*]] = phi i64 [ 3, [[COND_TRUE11]] ], [ [[TMP20]], [[COND_FALSE12]] ] -// CHECK5-NEXT: store i64 [[COND14]], i64* [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK5-NEXT: store i64 [[TMP21]], i64* [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND15:%.*]] -// CHECK5: omp.inner.for.cond15: -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[CMP16:%.*]] = icmp ule i64 [[TMP22]], [[TMP23]] -// CHECK5-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY17:%.*]], label [[OMP_INNER_FOR_END31:%.*]] -// CHECK5: omp.inner.for.body17: -// CHECK5-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: [[MUL18:%.*]] = mul i64 [[TMP24]], 400 -// CHECK5-NEXT: [[SUB19:%.*]] = sub i64 2000, [[MUL18]] -// CHECK5-NEXT: store i64 [[SUB19]], i64* [[IT]], align 8 -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV20:%.*]] = sitofp i32 [[TMP25]] to double -// CHECK5-NEXT: [[ADD21:%.*]] = fadd double [[CONV20]], 1.500000e+00 -// CHECK5-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: store double [[ADD21]], double* [[A22]], align 8 -// CHECK5-NEXT: [[A23:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP26:%.*]] = load double, double* [[A23]], align 8 -// CHECK5-NEXT: [[INC24:%.*]] = fadd double [[TMP26]], 1.000000e+00 -// CHECK5-NEXT: store double [[INC24]], double* [[A23]], align 8 -// CHECK5-NEXT: [[CONV25:%.*]] = fptosi double [[INC24]] to i16 -// CHECK5-NEXT: [[TMP27:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK5-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP27]] -// CHECK5-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX26]], i64 1 -// CHECK5-NEXT: store i16 [[CONV25]], i16* [[ARRAYIDX27]], align 2 -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE28:%.*]] -// CHECK5: omp.body.continue28: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC29:%.*]] -// CHECK5: omp.inner.for.inc29: -// CHECK5-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: [[ADD30:%.*]] = add i64 [[TMP28]], 1 -// CHECK5-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND15]], !llvm.loop [[LOOP42:![0-9]+]] -// CHECK5: omp.inner.for.end31: +// CHECK5-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK5-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[CMP7:%.*]] = icmp ugt i64 [[TMP28]], 3 +// CHECK5-NEXT: br i1 [[CMP7]], label [[COND_TRUE8:%.*]], label [[COND_FALSE9:%.*]] +// CHECK5: cond.true8: +// CHECK5-NEXT: br label [[COND_END10:%.*]] +// CHECK5: cond.false9: +// CHECK5-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: br label [[COND_END10]] +// CHECK5: cond.end10: +// CHECK5-NEXT: [[COND11:%.*]] = phi i64 [ 3, [[COND_TRUE8]] ], [ [[TMP29]], [[COND_FALSE9]] ] +// CHECK5-NEXT: store i64 [[COND11]], i64* [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP30]], i64* [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK5: omp.inner.for.cond12: +// CHECK5-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[CMP13:%.*]] = icmp ule i64 [[TMP31]], [[TMP32]] +// CHECK5-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END28:%.*]] +// CHECK5: omp.inner.for.body14: +// CHECK5-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[MUL15:%.*]] = mul i64 [[TMP33]], 400 +// CHECK5-NEXT: [[SUB16:%.*]] = sub i64 2000, [[MUL15]] +// CHECK5-NEXT: store i64 [[SUB16]], i64* [[IT]], align 8 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, i32* [[B]], align 4 +// CHECK5-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP34]] to double +// CHECK5-NEXT: [[ADD18:%.*]] = fadd double [[CONV17]], 1.500000e+00 +// CHECK5-NEXT: [[A19:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: store double [[ADD18]], double* [[A19]], align 8 +// CHECK5-NEXT: [[A20:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP35:%.*]] = load double, double* [[A20]], align 8 +// CHECK5-NEXT: [[INC21:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK5-NEXT: store double [[INC21]], double* [[A20]], align 8 +// CHECK5-NEXT: [[CONV22:%.*]] = fptosi double [[INC21]] to i16 +// CHECK5-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK5-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i64 [[TMP36]] +// CHECK5-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX23]], i64 1 +// CHECK5-NEXT: store i16 [[CONV22]], i16* [[ARRAYIDX24]], align 2 +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE25:%.*]] +// CHECK5: omp.body.continue25: +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC26:%.*]] +// CHECK5: omp.inner.for.inc26: +// CHECK5-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[ADD27:%.*]] = add i64 [[TMP37]], 1 +// CHECK5-NEXT: store i64 [[ADD27]], i64* [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK5: omp.inner.for.end28: // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK5-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK5-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK5-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK5-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i64 400, i64* [[IT]], align 8 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4865,9 +4978,7 @@ // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 @@ -4876,43 +4987,47 @@ // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK5-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 -// CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* -// CHECK5-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK5-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV2]], align 1 +// CHECK5-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 -// CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* -// CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK5-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK5-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 8 // CHECK5-NEXT: ret void // // @@ -4922,34 +5037,33 @@ // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK5-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK5-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -4959,70 +5073,74 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK5-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK5-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK5-NEXT: store i64 6, i64* [[DOTOMP_UB]], align 8 // CHECK5-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK5-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_IV]], align 8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !44 -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !44 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] -// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !44 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK5-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !44 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !44 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !44 -// CHECK5-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !44 -// CHECK5-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK5-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK5-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !44 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK5-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !44 +// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK5-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK5-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !44 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 2 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK5-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !44 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK5-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 +// CHECK5-NEXT: store i64 [[ADD6]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !44 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i64 11, i64* [[I]], align 8 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5055,7 +5173,7 @@ // CHECK7-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 // CHECK7-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 -// CHECK7-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK7-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK7-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[LIN:%.*]] = alloca i32, align 4 @@ -5312,15 +5430,17 @@ // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK7-SAME: () #[[ATTR2:[0-9]+]] { // CHECK7-NEXT: entry: -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5330,52 +5450,54 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 5, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK7-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK7-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK7-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 33, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5391,7 +5513,7 @@ // CHECK7-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 4 // CHECK7-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 4 // CHECK7-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 4 -// CHECK7-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 4 // CHECK7-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 4 // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 @@ -5402,7 +5524,7 @@ // CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK7-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK7-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK7-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* // CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) // CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) @@ -5413,8 +5535,8 @@ // CHECK7-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !26 // CHECK7-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !26 // CHECK7-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !26 -// CHECK7-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !26 -// CHECK7-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !26 +// CHECK7-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !26 +// CHECK7-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !26 // CHECK7-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0, i32 0, i8* null, i32 0, i8* null) #[[ATTR4]] // CHECK7-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK7-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -5430,24 +5552,26 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[K_ADDR:%.*]] = alloca i64*, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK7-NEXT: store i64* [[K]], i64** [[K_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load i64*, i64** [[K_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i64*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP2]], i64* [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store i64* [[TMP0]], i64** [[TMP3]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i64* noundef nonnull align 4 dereferenceable(8) [[K:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[K_ADDR:%.*]] = alloca i64*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -5456,76 +5580,80 @@ // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[K1:%.*]] = alloca i64, align 8 +// CHECK7-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i64* [[K]], i64** [[K_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i64*, i64** [[K_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 8 -// CHECK7-NEXT: store i64 [[TMP1]], i64* [[DOTLINEAR_START]], align 8 +// CHECK7-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK7-NEXT: store i64 [[TMP5]], i64* [[DOTLINEAR_START]], align 8 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 8, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK7-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) +// CHECK7-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK7-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP4]], 0 +// CHECK7-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK7-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !27 -// CHECK7-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !27 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK7-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 -// CHECK7-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK7-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !27 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !27 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK7-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !27 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[MUL1:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK7-NEXT: [[CONV:%.*]] = sext i32 [[MUL1]] to i64 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] +// CHECK7-NEXT: store i64 [[ADD]], i64* [[K]], align 8, !llvm.access.group !27 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK7-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !27 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK7-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK7-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK7-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK7-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 1, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK7-NEXT: br i1 [[TMP16]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK7: .omp.linear.pu: -// CHECK7-NEXT: [[TMP17:%.*]] = load i64, i64* [[K1]], align 8 -// CHECK7-NEXT: store i64 [[TMP17]], i64* [[TMP0]], align 8 +// CHECK7-NEXT: [[TMP21:%.*]] = load i64, i64* [[K]], align 8 +// CHECK7-NEXT: store i64 [[TMP21]], i64* [[TMP4]], align 8 // CHECK7-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK7: .omp.linear.pu.done: // CHECK7-NEXT: ret void @@ -5537,35 +5665,33 @@ // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[LIN_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK7-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK7-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK7-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[LIN_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], i32* [[LIN_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[LIN_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK7-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[LIN_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[LIN:%.*]], i32 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK7-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK7-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK7-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -5580,96 +5706,103 @@ // CHECK7-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK7-NEXT: [[TMP0:%.*]] = load i32, i32* [[LIN_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START1]], align 4 +// CHECK7-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 4 +// CHECK7-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[LIN]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK7-NEXT: store i32 [[TMP6]], i32* [[A]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[LIN]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START1]], align 4 // CHECK7-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK7-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 // CHECK7-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK7-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK7-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK7-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK7-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK7-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]]) +// CHECK7-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK7-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK7-NEXT: store i64 [[TMP13]], i64* [[DOTOMP_IV]], align 8 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 -// CHECK7-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !30 -// CHECK7-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !30 +// CHECK7-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 -// CHECK7-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK7-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK7-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK7-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK7-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !30 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !30 -// CHECK7-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK7-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 -// CHECK7-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !30 -// CHECK7-NEXT: [[MUL6:%.*]] = mul i64 [[TMP11]], [[TMP12]] -// CHECK7-NEXT: [[ADD:%.*]] = add i64 [[CONV5]], [[MUL6]] -// CHECK7-NEXT: [[CONV7:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK7-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4, !llvm.access.group !30 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4, !llvm.access.group !30 -// CHECK7-NEXT: [[CONV8:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK7-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 -// CHECK7-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !30 -// CHECK7-NEXT: [[MUL9:%.*]] = mul i64 [[TMP14]], [[TMP15]] -// CHECK7-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] -// CHECK7-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 -// CHECK7-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !30 -// CHECK7-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 -// CHECK7-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK7-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 -// CHECK7-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK7-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK7-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !30 +// CHECK7-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] +// CHECK7-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] +// CHECK7-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK7-NEXT: store i32 [[CONV6]], i32* [[LIN2]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK7-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !30 +// CHECK7-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] +// CHECK7-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] +// CHECK7-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 +// CHECK7-NEXT: store i32 [[CONV10]], i32* [[A3]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP23:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !30 +// CHECK7-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK7-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 +// CHECK7-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 +// CHECK7-NEXT: store i16 [[CONV13]], i16* [[AA]], align 2, !llvm.access.group !30 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 -// CHECK7-NEXT: [[ADD15:%.*]] = add i64 [[TMP17]], 1 -// CHECK7-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK7-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 +// CHECK7-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK7-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK7-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i64 400, i64* [[IT]], align 8 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK7-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK7-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK7: .omp.linear.pu: -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN2]], align 4 -// CHECK7-NEXT: store i32 [[TMP22]], i32* [[LIN_ADDR]], align 4 -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK7-NEXT: store i32 [[TMP23]], i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, i32* [[LIN2]], align 4 +// CHECK7-NEXT: store i32 [[TMP29]], i32* [[LIN]], align 4 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK7-NEXT: store i32 [[TMP30]], i32* [[A]], align 4 // CHECK7-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK7: .omp.linear.pu.done: // CHECK7-NEXT: ret void @@ -5680,29 +5813,28 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK7-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK7-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK7-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5712,64 +5844,69 @@ // CHECK7-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK7-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK7-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 3, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] -// CHECK7-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK7-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2, !llvm.access.group !33 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK7-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK7-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !33 -// CHECK7-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK7-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK7-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2, !llvm.access.group !33 +// CHECK7-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK7-NEXT: store i16 [[CONV]], i16* [[IT]], align 2, !llvm.access.group !33 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !33 +// CHECK7-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK7-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 +// CHECK7-NEXT: store i16 [[CONV5]], i16* [[AA]], align 2, !llvm.access.group !33 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK7-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK7-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK7-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK7-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK7-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i16 22, i16* [[IT]], align 2 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5790,8 +5927,7 @@ // CHECK7-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 // CHECK7-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 // CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK7-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 // CHECK7-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -5810,31 +5946,40 @@ // CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 // CHECK7-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 // CHECK7-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x float]*, i32, float*, [5 x [10 x double]]*, i32, i32, double*, %struct.TT*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP9]], [10 x float]* [[TMP0]], i32 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]], i32 [[TMP11]]) +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store i32 [[TMP1]], i32* [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store float* [[TMP2]], float** [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK7-NEXT: store i32 [[TMP5]], i32* [[TMP15]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK7-NEXT: store double* [[TMP6]], double** [[TMP16]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK7-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 -// CHECK7-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 -// CHECK7-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 -// CHECK7-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 -// CHECK7-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 -// CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5844,125 +5989,131 @@ // CHECK7-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK7-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 -// CHECK7-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 -// CHECK7-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 -// CHECK7-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 7 +// CHECK7-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 8 +// CHECK7-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 9 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK7-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK7-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 -// CHECK7-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK7-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK7-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK7-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK7-NEXT: store i8 [[CONV]], i8* [[IT]], align 1, !llvm.access.group !36 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !36 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK7-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4, !llvm.access.group !36 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !36 -// CHECK7-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[A]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP33:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK7-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK7-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK7-NEXT: store float [[CONV5]], float* [[ARRAYIDX]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 3 +// CHECK7-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK7-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK7-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK7-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !36 -// CHECK7-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK7-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !36 -// CHECK7-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK7-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK7-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK7-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !36 -// CHECK7-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 -// CHECK7-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !36 -// CHECK7-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK7-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !36 -// CHECK7-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK7-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] -// CHECK7-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK7-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !36 -// CHECK7-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK7-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !36 -// CHECK7-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !36 -// CHECK7-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK7-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !36 -// CHECK7-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK7-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !36 -// CHECK7-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK7-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK7-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK7-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: store float [[CONV9]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i32 0, i32 1 +// CHECK7-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX10]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX11]], align 8, !llvm.access.group !36 +// CHECK7-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK7-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !36 +// CHECK7-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK7-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 [[TMP36]] +// CHECK7-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX13]], i32 3 +// CHECK7-NEXT: [[TMP37:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !36 +// CHECK7-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK7-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !36 +// CHECK7-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP38:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK7-NEXT: store i64 [[ADD16]], i64* [[X]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP39:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK7-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK7-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK7-NEXT: store i8 [[CONV19]], i8* [[Y]], align 4, !llvm.access.group !36 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK7-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK7-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK7-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: -// CHECK7-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK7-NEXT: store i32 [[ADD25]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK7-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK7-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK7-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) -// CHECK7-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK7-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK7-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK7-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i8 96, i8* [[IT]], align 1 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6292,8 +6443,7 @@ // CHECK7-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 // CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK7-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) @@ -6308,26 +6458,33 @@ // CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK7-NEXT: [[TMP4:%.*]] = load i16*, i16** [[C_ADDR]], align 4 // CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], i32* [[B_CASTED]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 -// CHECK7-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store i32 [[TMP3]], i32* [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store i16* [[TMP4]], i16** [[TMP10]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK7-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK7-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK7-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK7-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK7-NEXT: store i8 [[FROMBOOL]], i8* [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK7-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK7-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK7: omp_if.then: -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*, i32)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]]) +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] // CHECK7: omp_if.else: // CHECK7-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK7-NEXT: call void @.omp_outlined..10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]]) #[[ATTR4]] +// CHECK7-NEXT: call void @.omp_outlined..10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] // CHECK7-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK7-NEXT: br label [[OMP_IF_END]] // CHECK7: omp_if.end: @@ -6335,16 +6492,13 @@ // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 -// CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK7-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK7-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -6354,137 +6508,144 @@ // CHECK7-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK7-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP11]], align 4 +// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK7-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK7-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK7-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK7-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK7-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 -// CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK7-NEXT: [[TMP13:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK7-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK7: omp_if.then: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK7-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP7]], 3 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK7-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP16]], 3 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK7-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK7-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK7-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 -// CHECK7-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !39 -// CHECK7-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] -// CHECK7-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 +// CHECK7-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !39 +// CHECK7-NEXT: [[CMP2:%.*]] = icmp ule i64 [[TMP19]], [[TMP20]] +// CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 -// CHECK7-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 +// CHECK7-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 +// CHECK7-NEXT: [[MUL:%.*]] = mul i64 [[TMP21]], 400 // CHECK7-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK7-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !39 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !39 -// CHECK7-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP13]] to double -// CHECK7-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[B]], align 4, !llvm.access.group !39 +// CHECK7-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double +// CHECK7-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK7-NEXT: store double [[ADD]], double* [[A]], align 4, !nontemporal !40, !llvm.access.group !39 -// CHECK7-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP14:%.*]] = load double, double* [[A5]], align 4, !nontemporal !40, !llvm.access.group !39 -// CHECK7-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK7-NEXT: store double [[INC]], double* [[A5]], align 4, !nontemporal !40, !llvm.access.group !39 -// CHECK7-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK7-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP15]] -// CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK7-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2, !llvm.access.group !39 +// CHECK7-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP23:%.*]] = load double, double* [[A3]], align 4, !nontemporal !40, !llvm.access.group !39 +// CHECK7-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK7-NEXT: store double [[INC]], double* [[A3]], align 4, !nontemporal !40, !llvm.access.group !39 +// CHECK7-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK7-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 [[TMP24]] +// CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK7-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2, !llvm.access.group !39 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 -// CHECK7-NEXT: [[ADD8:%.*]] = add i64 [[TMP16]], 1 -// CHECK7-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 +// CHECK7-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 +// CHECK7-NEXT: [[ADD6:%.*]] = add i64 [[TMP25]], 1 +// CHECK7-NEXT: store i64 [[ADD6]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] // CHECK7: omp_if.else: -// CHECK7-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK7-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[CMP9:%.*]] = icmp ugt i64 [[TMP19]], 3 -// CHECK7-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] -// CHECK7: cond.true10: -// CHECK7-NEXT: br label [[COND_END12:%.*]] -// CHECK7: cond.false11: -// CHECK7-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: br label [[COND_END12]] -// CHECK7: cond.end12: -// CHECK7-NEXT: [[COND13:%.*]] = phi i64 [ 3, [[COND_TRUE10]] ], [ [[TMP20]], [[COND_FALSE11]] ] -// CHECK7-NEXT: store i64 [[COND13]], i64* [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK7-NEXT: store i64 [[TMP21]], i64* [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND14:%.*]] -// CHECK7: omp.inner.for.cond14: -// CHECK7-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[CMP15:%.*]] = icmp ule i64 [[TMP22]], [[TMP23]] -// CHECK7-NEXT: br i1 [[CMP15]], label [[OMP_INNER_FOR_BODY16:%.*]], label [[OMP_INNER_FOR_END30:%.*]] -// CHECK7: omp.inner.for.body16: -// CHECK7-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: [[MUL17:%.*]] = mul i64 [[TMP24]], 400 -// CHECK7-NEXT: [[SUB18:%.*]] = sub i64 2000, [[MUL17]] -// CHECK7-NEXT: store i64 [[SUB18]], i64* [[IT]], align 8 -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK7-NEXT: [[CONV19:%.*]] = sitofp i32 [[TMP25]] to double -// CHECK7-NEXT: [[ADD20:%.*]] = fadd double [[CONV19]], 1.500000e+00 -// CHECK7-NEXT: [[A21:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: store double [[ADD20]], double* [[A21]], align 4 -// CHECK7-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP26:%.*]] = load double, double* [[A22]], align 4 -// CHECK7-NEXT: [[INC23:%.*]] = fadd double [[TMP26]], 1.000000e+00 -// CHECK7-NEXT: store double [[INC23]], double* [[A22]], align 4 -// CHECK7-NEXT: [[CONV24:%.*]] = fptosi double [[INC23]] to i16 -// CHECK7-NEXT: [[TMP27:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK7-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP27]] -// CHECK7-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX25]], i32 1 -// CHECK7-NEXT: store i16 [[CONV24]], i16* [[ARRAYIDX26]], align 2 -// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] -// CHECK7: omp.body.continue27: -// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] -// CHECK7: omp.inner.for.inc28: -// CHECK7-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: [[ADD29:%.*]] = add i64 [[TMP28]], 1 -// CHECK7-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND14]], !llvm.loop [[LOOP43:![0-9]+]] -// CHECK7: omp.inner.for.end30: +// CHECK7-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK7-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[CMP7:%.*]] = icmp ugt i64 [[TMP28]], 3 +// CHECK7-NEXT: br i1 [[CMP7]], label [[COND_TRUE8:%.*]], label [[COND_FALSE9:%.*]] +// CHECK7: cond.true8: +// CHECK7-NEXT: br label [[COND_END10:%.*]] +// CHECK7: cond.false9: +// CHECK7-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: br label [[COND_END10]] +// CHECK7: cond.end10: +// CHECK7-NEXT: [[COND11:%.*]] = phi i64 [ 3, [[COND_TRUE8]] ], [ [[TMP29]], [[COND_FALSE9]] ] +// CHECK7-NEXT: store i64 [[COND11]], i64* [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK7-NEXT: store i64 [[TMP30]], i64* [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK7: omp.inner.for.cond12: +// CHECK7-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[CMP13:%.*]] = icmp ule i64 [[TMP31]], [[TMP32]] +// CHECK7-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END28:%.*]] +// CHECK7: omp.inner.for.body14: +// CHECK7-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[MUL15:%.*]] = mul i64 [[TMP33]], 400 +// CHECK7-NEXT: [[SUB16:%.*]] = sub i64 2000, [[MUL15]] +// CHECK7-NEXT: store i64 [[SUB16]], i64* [[IT]], align 8 +// CHECK7-NEXT: [[TMP34:%.*]] = load i32, i32* [[B]], align 4 +// CHECK7-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP34]] to double +// CHECK7-NEXT: [[ADD18:%.*]] = fadd double [[CONV17]], 1.500000e+00 +// CHECK7-NEXT: [[A19:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK7-NEXT: store double [[ADD18]], double* [[A19]], align 4 +// CHECK7-NEXT: [[A20:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP35:%.*]] = load double, double* [[A20]], align 4 +// CHECK7-NEXT: [[INC21:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK7-NEXT: store double [[INC21]], double* [[A20]], align 4 +// CHECK7-NEXT: [[CONV22:%.*]] = fptosi double [[INC21]] to i16 +// CHECK7-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK7-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 [[TMP36]] +// CHECK7-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX23]], i32 1 +// CHECK7-NEXT: store i16 [[CONV22]], i16* [[ARRAYIDX24]], align 2 +// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE25:%.*]] +// CHECK7: omp.body.continue25: +// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC26:%.*]] +// CHECK7: omp.inner.for.inc26: +// CHECK7-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[ADD27:%.*]] = add i64 [[TMP37]], 1 +// CHECK7-NEXT: store i64 [[ADD27]], i64* [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP43:![0-9]+]] +// CHECK7: omp.inner.for.end28: // CHECK7-NEXT: br label [[OMP_IF_END]] // CHECK7: omp_if.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK7-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK7-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK7-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK7-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i64 400, i64* [[IT]], align 8 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6499,9 +6660,7 @@ // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 @@ -6509,41 +6668,47 @@ // CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* // CHECK7-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK7-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK7-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK7-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* -// CHECK7-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK7-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK7-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK7-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 -// CHECK7-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* -// CHECK7-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK7-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK7-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 4 // CHECK7-NEXT: ret void // // @@ -6553,32 +6718,32 @@ // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK7-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK7-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK7-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK7-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK7-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -6588,69 +6753,74 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK7-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK7-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK7-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK7-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK7-NEXT: store i64 6, i64* [[DOTOMP_UB]], align 8 // CHECK7-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK7-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK7-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK7-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK7-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_IV]], align 8 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !45 -// CHECK7-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !45 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !45 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !45 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK7-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !45 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK7-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !45 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !45 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !45 -// CHECK7-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !45 -// CHECK7-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK7-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK7-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !45 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 -// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK7-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK7-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !45 +// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK7-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK7-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !45 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK7-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !45 -// CHECK7-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK7-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !45 +// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 +// CHECK7-NEXT: store i64 [[ADD6]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !45 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK7-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i64 11, i64* [[I]], align 8 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -8672,15 +8842,17 @@ // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK17-SAME: () #[[ATTR0:[0-9]+]] { // CHECK17-NEXT: entry: -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8690,52 +8862,54 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 5, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK17-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK17-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 33, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -8749,145 +8923,146 @@ // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[LIN_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: store i64 [[LIN]], i64* [[LIN_ADDR]], align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK17-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK17-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 +// CHECK17-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[LIN:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK17-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTLINEAR_START3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTLINEAR_START1:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTLINEAR_STEP:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[IT:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[LIN4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A5:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[LIN2:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[LIN]], i64* [[LIN_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK17-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 +// CHECK17-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 4 +// CHECK17-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[LIN]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], i32* [[A]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[LIN]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START1]], align 4 // CHECK17-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK17-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 // CHECK17-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK17-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK17-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP10]]) +// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK17-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP13]], i64* [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !17 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !17 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 -// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK17-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK17-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !17 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !17 -// CHECK17-NEXT: [[CONV7:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !17 -// CHECK17-NEXT: [[MUL8:%.*]] = mul i64 [[TMP11]], [[TMP12]] -// CHECK17-NEXT: [[ADD:%.*]] = add i64 [[CONV7]], [[MUL8]] -// CHECK17-NEXT: [[CONV9:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK17-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4, !llvm.access.group !17 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4, !llvm.access.group !17 -// CHECK17-NEXT: [[CONV10:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !17 -// CHECK17-NEXT: [[MUL11:%.*]] = mul i64 [[TMP14]], [[TMP15]] -// CHECK17-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] -// CHECK17-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 -// CHECK17-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !17 -// CHECK17-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !17 -// CHECK17-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK17-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 -// CHECK17-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !17 +// CHECK17-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] +// CHECK17-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK17-NEXT: store i32 [[CONV6]], i32* [[LIN2]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK17-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !17 +// CHECK17-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] +// CHECK17-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 +// CHECK17-NEXT: store i32 [[CONV10]], i32* [[A3]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP23:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !17 +// CHECK17-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK17-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 +// CHECK17-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 +// CHECK17-NEXT: store i16 [[CONV13]], i16* [[AA]], align 2, !llvm.access.group !17 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 -// CHECK17-NEXT: [[ADD17:%.*]] = add i64 [[TMP17]], 1 -// CHECK17-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK17-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 +// CHECK17-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK17-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i64 400, i64* [[IT]], align 8 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK17-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK17-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK17: .omp.linear.pu: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK17-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK17-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[LIN2]], align 4 +// CHECK17-NEXT: store i32 [[TMP29]], i32* [[LIN]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK17-NEXT: store i32 [[TMP30]], i32* [[A]], align 4 // CHECK17-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK17: .omp.linear.pu.done: // CHECK17-NEXT: ret void @@ -8904,31 +9079,29 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK17-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK17-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8938,65 +9111,69 @@ // CHECK17-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK17-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 3, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK17-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !20 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !20 -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !20 -// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !20 -// CHECK17-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 -// CHECK17-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK17-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2, !llvm.access.group !20 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK17-NEXT: store i16 [[CONV]], i16* [[IT]], align 2, !llvm.access.group !20 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !20 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !20 +// CHECK17-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !20 +// CHECK17-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK17-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 +// CHECK17-NEXT: store i16 [[CONV5]], i16* [[AA]], align 2, !llvm.access.group !20 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK17-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i16 22, i16* [[IT]], align 2 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9017,8 +9194,7 @@ // CHECK17-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -9039,33 +9215,40 @@ // CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 -// CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [10 x float]*, i64, float*, [5 x [10 x double]]*, i64, i64, double*, %struct.TT*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], [10 x float]* [[TMP0]], i64 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]], i64 [[TMP11]]) +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP1]], i64* [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store float* [[TMP2]], float** [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK17-NEXT: store i64 [[TMP5]], i64* [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK17-NEXT: store double* [[TMP6]], double** [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK17-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV5]], align 4 +// CHECK17-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 -// CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9075,127 +9258,131 @@ // CHECK17-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK17-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 -// CHECK17-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 -// CHECK17-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 -// CHECK17-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK17-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 8 +// CHECK17-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 9 +// CHECK17-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 10 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 8 +// CHECK17-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] -// CHECK17-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK17-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !23 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double -// CHECK17-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 -// CHECK17-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK17-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK17-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double -// CHECK17-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 -// CHECK17-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK17-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 -// CHECK17-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !23 -// CHECK17-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !23 -// CHECK17-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK17-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] -// CHECK17-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK17-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !23 -// CHECK17-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !23 -// CHECK17-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !23 -// CHECK17-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK17-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !23 -// CHECK17-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !23 -// CHECK17-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK17-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 -// CHECK17-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK17-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !23 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 +// CHECK17-NEXT: store i8 [[CONV]], i8* [[IT]], align 1, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[A]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP33:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK17-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK17-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK17-NEXT: store float [[CONV5]], float* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 3 +// CHECK17-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double +// CHECK17-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 +// CHECK17-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float +// CHECK17-NEXT: store float [[CONV9]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i64 0, i64 1 +// CHECK17-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX10]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX11]], align 8, !llvm.access.group !23 +// CHECK17-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK17-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK17-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 [[TMP36]] +// CHECK17-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX13]], i64 3 +// CHECK17-NEXT: [[TMP37:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !23 +// CHECK17-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK17-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !23 +// CHECK17-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP38:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !23 +// CHECK17-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK17-NEXT: store i64 [[ADD16]], i64* [[X]], align 8, !llvm.access.group !23 +// CHECK17-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP39:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !23 +// CHECK17-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK17-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK17-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK17-NEXT: store i8 [[CONV19]], i8* [[Y]], align 8, !llvm.access.group !23 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK17-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK17-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK17-NEXT: store i32 [[ADD27]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK17-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK17-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK17-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) -// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK17-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK17-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK17-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i8 96, i8* [[IT]], align 1 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9210,9 +9397,7 @@ // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 @@ -9221,43 +9406,47 @@ // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 -// CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* -// CHECK17-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK17-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV2]], align 1 +// CHECK17-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK17-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK17-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 8 // CHECK17-NEXT: ret void // // @@ -9269,7 +9458,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 -// CHECK17-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK17-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -9280,24 +9469,28 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]]) +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 +// CHECK17-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -9307,76 +9500,79 @@ // CHECK17-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 8 // CHECK17-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK17-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK17-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK17-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK17-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP15]], i64* [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 -// CHECK17-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !26 -// CHECK17-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !26 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 -// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 +// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK17-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK17-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !26 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !26 -// CHECK17-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[B]], align 4, !llvm.access.group !26 +// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK17-NEXT: store double [[ADD]], double* [[A]], align 8, !llvm.access.group !26 -// CHECK17-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 8, !llvm.access.group !26 -// CHECK17-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK17-NEXT: store double [[INC]], double* [[A5]], align 8, !llvm.access.group !26 -// CHECK17-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK17-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP14]] -// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2, !llvm.access.group !26 +// CHECK17-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP20:%.*]] = load double, double* [[A2]], align 8, !llvm.access.group !26 +// CHECK17-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK17-NEXT: store double [[INC]], double* [[A2]], align 8, !llvm.access.group !26 +// CHECK17-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK17-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i64 [[TMP21]] +// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK17-NEXT: store i16 [[CONV3]], i16* [[ARRAYIDX4]], align 2, !llvm.access.group !26 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 -// CHECK17-NEXT: [[ADD8:%.*]] = add i64 [[TMP15]], 1 -// CHECK17-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 +// CHECK17-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 +// CHECK17-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK17-NEXT: store i64 [[ADD5]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i64 400, i64* [[IT]], align 8 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9390,34 +9586,33 @@ // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK17-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -9427,70 +9622,74 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK17-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK17-NEXT: store i64 6, i64* [[DOTOMP_UB]], align 8 // CHECK17-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK17-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK17-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !29 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] -// CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !29 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK17-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !29 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !29 -// CHECK17-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK17-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !29 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !29 +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK17-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !29 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !29 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK17-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 +// CHECK17-NEXT: store i64 [[ADD6]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK17-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i64 11, i64* [[I]], align 8 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9501,15 +9700,17 @@ // CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK19-SAME: () #[[ATTR0:[0-9]+]] { // CHECK19-NEXT: entry: -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9519,52 +9720,54 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 5, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK19-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK19-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 33, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9578,35 +9781,33 @@ // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[LIN_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK19-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[LIN_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[LIN_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[LIN_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK19-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[LIN_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[LIN:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK19-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK19-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK19-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -9621,96 +9822,103 @@ // CHECK19-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[LIN_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START1]], align 4 +// CHECK19-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 4 +// CHECK19-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[LIN]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], i32* [[A]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[LIN]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START1]], align 4 // CHECK19-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK19-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 // CHECK19-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK19-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK19-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK19-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK19-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP10]]) +// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK19-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK19-NEXT: store i64 [[TMP13]], i64* [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 -// CHECK19-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !18 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !18 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 -// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK19-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK19-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK19-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !18 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK19-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 -// CHECK19-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !18 -// CHECK19-NEXT: [[MUL6:%.*]] = mul i64 [[TMP11]], [[TMP12]] -// CHECK19-NEXT: [[ADD:%.*]] = add i64 [[CONV5]], [[MUL6]] -// CHECK19-NEXT: [[CONV7:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK19-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[CONV8:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK19-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 -// CHECK19-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !18 -// CHECK19-NEXT: [[MUL9:%.*]] = mul i64 [[TMP14]], [[TMP15]] -// CHECK19-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] -// CHECK19-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 -// CHECK19-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !18 -// CHECK19-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK19-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 -// CHECK19-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK19-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !18 +// CHECK19-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] +// CHECK19-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK19-NEXT: store i32 [[CONV6]], i32* [[LIN2]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK19-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !18 +// CHECK19-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] +// CHECK19-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] +// CHECK19-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 +// CHECK19-NEXT: store i32 [[CONV10]], i32* [[A3]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP23:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !18 +// CHECK19-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK19-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 +// CHECK19-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 +// CHECK19-NEXT: store i16 [[CONV13]], i16* [[AA]], align 2, !llvm.access.group !18 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 -// CHECK19-NEXT: [[ADD15:%.*]] = add i64 [[TMP17]], 1 -// CHECK19-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK19-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 +// CHECK19-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK19-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK19-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i64 400, i64* [[IT]], align 8 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK19: .omp.final.done: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK19-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK19-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK19: .omp.linear.pu: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN2]], align 4 -// CHECK19-NEXT: store i32 [[TMP22]], i32* [[LIN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK19-NEXT: store i32 [[TMP23]], i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[LIN2]], align 4 +// CHECK19-NEXT: store i32 [[TMP29]], i32* [[LIN]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK19-NEXT: store i32 [[TMP30]], i32* [[A]], align 4 // CHECK19-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK19: .omp.linear.pu.done: // CHECK19-NEXT: ret void @@ -9727,29 +9935,28 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK19-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK19-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9759,64 +9966,69 @@ // CHECK19-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK19-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 3, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] -// CHECK19-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK19-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2, !llvm.access.group !21 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !21 -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK19-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !21 -// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !21 -// CHECK19-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK19-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK19-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2, !llvm.access.group !21 +// CHECK19-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK19-NEXT: store i16 [[CONV]], i16* [[IT]], align 2, !llvm.access.group !21 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !21 +// CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 +// CHECK19-NEXT: store i16 [[CONV5]], i16* [[AA]], align 2, !llvm.access.group !21 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK19-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK19-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i16 22, i16* [[IT]], align 2 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9837,8 +10049,7 @@ // CHECK19-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -9857,31 +10068,40 @@ // CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 // CHECK19-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x float]*, i32, float*, [5 x [10 x double]]*, i32, i32, double*, %struct.TT*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP9]], [10 x float]* [[TMP0]], i32 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32 [[TMP1]], i32* [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store float* [[TMP2]], float** [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK19-NEXT: store i32 [[TMP5]], i32* [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK19-NEXT: store double* [[TMP6]], double** [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK19-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 -// CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9891,125 +10111,131 @@ // CHECK19-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 -// CHECK19-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 -// CHECK19-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 -// CHECK19-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 6 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 7 +// CHECK19-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 8 +// CHECK19-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 9 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK19-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK19-NEXT: store i8 [[CONV]], i8* [[IT]], align 1, !llvm.access.group !24 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !24 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK19-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4, !llvm.access.group !24 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 -// CHECK19-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[A]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP33:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK19-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK19-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK19-NEXT: store float [[CONV5]], float* [[ARRAYIDX]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 3 +// CHECK19-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK19-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK19-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK19-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !24 -// CHECK19-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK19-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 -// CHECK19-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK19-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK19-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK19-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 -// CHECK19-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 -// CHECK19-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 -// CHECK19-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 -// CHECK19-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK19-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] -// CHECK19-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK19-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 -// CHECK19-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 -// CHECK19-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !24 -// CHECK19-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK19-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !24 -// CHECK19-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !24 -// CHECK19-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK19-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK19-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK19-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: store float [[CONV9]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i32 0, i32 1 +// CHECK19-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX10]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX11]], align 8, !llvm.access.group !24 +// CHECK19-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK19-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !24 +// CHECK19-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK19-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 [[TMP36]] +// CHECK19-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX13]], i32 3 +// CHECK19-NEXT: [[TMP37:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !24 +// CHECK19-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK19-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !24 +// CHECK19-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP38:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK19-NEXT: store i64 [[ADD16]], i64* [[X]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP39:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK19-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK19-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK19-NEXT: store i8 [[CONV19]], i8* [[Y]], align 4, !llvm.access.group !24 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK19-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK19-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK19-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK19-NEXT: store i32 [[ADD25]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK19-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK19-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK19-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) -// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK19-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK19-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK19-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i8 96, i8* [[IT]], align 1 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10024,9 +10250,7 @@ // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 @@ -10034,41 +10258,47 @@ // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK19-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* -// CHECK19-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK19-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK19-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK19-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK19-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 4 // CHECK19-NEXT: ret void // // @@ -10080,7 +10310,7 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 -// CHECK19-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK19-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -10090,23 +10320,28 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK19-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]]) +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK19-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK19-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -10116,75 +10351,79 @@ // CHECK19-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 4 // CHECK19-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK19-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK19-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK19-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK19-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK19-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK19-NEXT: store i64 [[TMP15]], i64* [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 -// CHECK19-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !27 -// CHECK19-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK19-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 +// CHECK19-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !27 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 -// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK19-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 +// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK19-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK19-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !27 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !27 -// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[B]], align 4, !llvm.access.group !27 +// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK19-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK19-NEXT: store double [[ADD]], double* [[A]], align 4, !llvm.access.group !27 -// CHECK19-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP13:%.*]] = load double, double* [[A4]], align 4, !llvm.access.group !27 -// CHECK19-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK19-NEXT: store double [[INC]], double* [[A4]], align 4, !llvm.access.group !27 -// CHECK19-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK19-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP14]] -// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2, !llvm.access.group !27 +// CHECK19-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP20:%.*]] = load double, double* [[A2]], align 4, !llvm.access.group !27 +// CHECK19-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK19-NEXT: store double [[INC]], double* [[A2]], align 4, !llvm.access.group !27 +// CHECK19-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK19-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 [[TMP21]] +// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK19-NEXT: store i16 [[CONV3]], i16* [[ARRAYIDX4]], align 2, !llvm.access.group !27 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 -// CHECK19-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK19-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 +// CHECK19-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 +// CHECK19-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK19-NEXT: store i64 [[ADD5]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK19-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i64 400, i64* [[IT]], align 8 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10198,32 +10437,32 @@ // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK19-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK19-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -10233,69 +10472,74 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK19-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK19-NEXT: store i64 6, i64* [[DOTOMP_UB]], align 8 // CHECK19-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK19-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK19-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK19-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK19-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 -// CHECK19-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !30 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK19-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !30 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK19-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK19-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !30 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !30 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !30 -// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 -// CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !30 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !30 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK19-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !30 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !30 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !30 +// CHECK19-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !30 +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK19-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !30 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !30 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !30 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK19-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK19-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 +// CHECK19-NEXT: store i64 [[ADD6]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i64 11, i64* [[I]], align 8 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10306,15 +10550,17 @@ // CHECK21-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK21-SAME: () #[[ATTR0:[0-9]+]] { // CHECK21-NEXT: entry: -// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10324,52 +10570,54 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK21-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 5, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK21-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK21-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK21-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 33, i32* [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10383,145 +10631,146 @@ // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[LIN_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK21-NEXT: store i64 [[LIN]], i64* [[LIN_ADDR]], align 8 // CHECK21-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK21-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK21-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK21-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* -// CHECK21-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 -// CHECK21-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK21-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 -// CHECK21-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK21-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 4 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK21-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 +// CHECK21-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[LIN:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK21-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 -// CHECK21-NEXT: [[DOTLINEAR_START3:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[DOTLINEAR_START1:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTLINEAR_STEP:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[IT:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[LIN4:%.*]] = alloca i32, align 4 -// CHECK21-NEXT: [[A5:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[LIN2:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[LIN]], i64* [[LIN_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* -// CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK21-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK21-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK21-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 +// CHECK21-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 4 +// CHECK21-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK21-NEXT: store i32 [[TMP4]], i32* [[LIN]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK21-NEXT: store i32 [[TMP6]], i32* [[A]], align 4 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[LIN]], align 4 +// CHECK21-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK21-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START1]], align 4 // CHECK21-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK21-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 // CHECK21-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK21-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK21-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK21-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK21-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK21-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP10]]) +// CHECK21-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK21-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK21-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK21-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK21-NEXT: store i64 [[TMP13]], i64* [[DOTOMP_IV]], align 8 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 -// CHECK21-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !17 -// CHECK21-NEXT: [[CMP6:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] -// CHECK21-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK21-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !17 +// CHECK21-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] +// CHECK21-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 -// CHECK21-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK21-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK21-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK21-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK21-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !17 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !17 -// CHECK21-NEXT: [[CONV7:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK21-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 -// CHECK21-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !17 -// CHECK21-NEXT: [[MUL8:%.*]] = mul i64 [[TMP11]], [[TMP12]] -// CHECK21-NEXT: [[ADD:%.*]] = add i64 [[CONV7]], [[MUL8]] -// CHECK21-NEXT: [[CONV9:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK21-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4, !llvm.access.group !17 -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4, !llvm.access.group !17 -// CHECK21-NEXT: [[CONV10:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK21-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 -// CHECK21-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !17 -// CHECK21-NEXT: [[MUL11:%.*]] = mul i64 [[TMP14]], [[TMP15]] -// CHECK21-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] -// CHECK21-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 -// CHECK21-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !17 -// CHECK21-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !17 -// CHECK21-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK21-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 -// CHECK21-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK21-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2, !llvm.access.group !17 +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !17 +// CHECK21-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK21-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK21-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !17 +// CHECK21-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] +// CHECK21-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] +// CHECK21-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK21-NEXT: store i32 [[CONV6]], i32* [[LIN2]], align 4, !llvm.access.group !17 +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4, !llvm.access.group !17 +// CHECK21-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK21-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK21-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !17 +// CHECK21-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] +// CHECK21-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] +// CHECK21-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 +// CHECK21-NEXT: store i32 [[CONV10]], i32* [[A3]], align 4, !llvm.access.group !17 +// CHECK21-NEXT: [[TMP23:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !17 +// CHECK21-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK21-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 +// CHECK21-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 +// CHECK21-NEXT: store i16 [[CONV13]], i16* [[AA]], align 2, !llvm.access.group !17 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 -// CHECK21-NEXT: [[ADD17:%.*]] = add i64 [[TMP17]], 1 -// CHECK21-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK21-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK21-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 +// CHECK21-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK21-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK21-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i64 400, i64* [[IT]], align 8 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK21: .omp.final.done: -// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK21-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK21-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK21: .omp.linear.pu: -// CHECK21-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK21-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 4 -// CHECK21-NEXT: [[TMP23:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK21-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, i32* [[LIN2]], align 4 +// CHECK21-NEXT: store i32 [[TMP29]], i32* [[LIN]], align 4 +// CHECK21-NEXT: [[TMP30:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK21-NEXT: store i32 [[TMP30]], i32* [[A]], align 4 // CHECK21-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK21: .omp.linear.pu.done: // CHECK21-NEXT: ret void @@ -10538,31 +10787,29 @@ // CHECK21-NEXT: entry: // CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK21-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK21-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK21-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK21-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 -// CHECK21-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK21-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK21-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10572,65 +10819,69 @@ // CHECK21-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK21-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK21-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK21-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK21-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 3, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK21-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 -// CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] -// CHECK21-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK21-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !20 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !20 -// CHECK21-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK21-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !20 -// CHECK21-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !20 -// CHECK21-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK21-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 -// CHECK21-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK21-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2, !llvm.access.group !20 +// CHECK21-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK21-NEXT: store i16 [[CONV]], i16* [[IT]], align 2, !llvm.access.group !20 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !20 +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK21-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !20 +// CHECK21-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !20 +// CHECK21-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK21-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK21-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 +// CHECK21-NEXT: store i16 [[CONV5]], i16* [[AA]], align 2, !llvm.access.group !20 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK21-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK21-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK21-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK21-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK21-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK21-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i16 22, i16* [[IT]], align 2 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10651,8 +10902,7 @@ // CHECK21-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 // CHECK21-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 // CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK21-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK21-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 // CHECK21-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -10673,33 +10923,40 @@ // CHECK21-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK21-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK21-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK21-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 -// CHECK21-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK21-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 -// CHECK21-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [10 x float]*, i64, float*, [5 x [10 x double]]*, i64, i64, double*, %struct.TT*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], [10 x float]* [[TMP0]], i64 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]], i64 [[TMP11]]) +// CHECK21-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK21-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store i64 [[TMP1]], i64* [[TMP11]], align 8 +// CHECK21-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK21-NEXT: store float* [[TMP2]], float** [[TMP12]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK21-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 8 +// CHECK21-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK21-NEXT: store i64 [[TMP4]], i64* [[TMP14]], align 8 +// CHECK21-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK21-NEXT: store i64 [[TMP5]], i64* [[TMP15]], align 8 +// CHECK21-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK21-NEXT: store double* [[TMP6]], double** [[TMP16]], align 8 +// CHECK21-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK21-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 8 +// CHECK21-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV5]], align 4 +// CHECK21-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 8 +// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 -// CHECK21-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 -// CHECK21-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 -// CHECK21-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 -// CHECK21-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10709,127 +10966,131 @@ // CHECK21-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK21-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK21-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK21-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 -// CHECK21-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 -// CHECK21-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 -// CHECK21-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK21-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK21-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK21-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 5 +// CHECK21-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 6 +// CHECK21-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 7 +// CHECK21-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 +// CHECK21-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 8 +// CHECK21-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 8 +// CHECK21-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 9 +// CHECK21-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 8 +// CHECK21-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 10 +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 8 +// CHECK21-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK21-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK21: omp.dispatch.cond: -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK21-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK21-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK21-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK21: omp.dispatch.body: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK21-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 -// CHECK21-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK21-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK21-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK21-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK21-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] -// CHECK21-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK21-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !23 -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !23 -// CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK21-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4, !llvm.access.group !23 -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 -// CHECK21-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double -// CHECK21-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 -// CHECK21-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK21-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !23 -// CHECK21-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK21-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !23 -// CHECK21-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double -// CHECK21-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 -// CHECK21-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK21-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !23 -// CHECK21-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 -// CHECK21-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !23 -// CHECK21-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK21-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !23 -// CHECK21-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK21-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] -// CHECK21-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK21-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !23 -// CHECK21-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK21-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !23 -// CHECK21-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !23 -// CHECK21-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK21-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !23 -// CHECK21-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK21-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !23 -// CHECK21-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK21-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 -// CHECK21-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK21-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !23 +// CHECK21-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 +// CHECK21-NEXT: store i8 [[CONV]], i8* [[IT]], align 1, !llvm.access.group !23 +// CHECK21-NEXT: [[TMP32:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !23 +// CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK21-NEXT: store i32 [[ADD]], i32* [[A]], align 4, !llvm.access.group !23 +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i64 0, i64 2 +// CHECK21-NEXT: [[TMP33:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK21-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK21-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK21-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK21-NEXT: store float [[CONV5]], float* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 3 +// CHECK21-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !23 +// CHECK21-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double +// CHECK21-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 +// CHECK21-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float +// CHECK21-NEXT: store float [[CONV9]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !23 +// CHECK21-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i64 0, i64 1 +// CHECK21-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX10]], i64 0, i64 2 +// CHECK21-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX11]], align 8, !llvm.access.group !23 +// CHECK21-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK21-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !23 +// CHECK21-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK21-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 [[TMP36]] +// CHECK21-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX13]], i64 3 +// CHECK21-NEXT: [[TMP37:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !23 +// CHECK21-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK21-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !23 +// CHECK21-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP38:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !23 +// CHECK21-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK21-NEXT: store i64 [[ADD16]], i64* [[X]], align 8, !llvm.access.group !23 +// CHECK21-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP39:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !23 +// CHECK21-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK21-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK21-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK21-NEXT: store i8 [[CONV19]], i8* [[Y]], align 8, !llvm.access.group !23 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK21-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK21-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK21-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK21-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK21-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK21: omp.dispatch.inc: -// CHECK21-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK21-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK21-NEXT: store i32 [[ADD27]], i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK21-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK21-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK21-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK21-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK21-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK21-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK21: omp.dispatch.end: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) -// CHECK21-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK21-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK21-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK21-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i8 96, i8* [[IT]], align 1 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10844,9 +11105,7 @@ // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK21-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 @@ -10855,43 +11114,47 @@ // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK21-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK21-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK21-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK21-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 -// CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* -// CHECK21-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 -// CHECK21-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 -// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK21-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK21-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV2]], align 1 +// CHECK21-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 8 +// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK21-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 -// CHECK21-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* -// CHECK21-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK21-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK21-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK21-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK21-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 8 // CHECK21-NEXT: ret void // // @@ -10904,8 +11167,7 @@ // CHECK21-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 // CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK21-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) @@ -10921,27 +11183,33 @@ // CHECK21-NEXT: [[TMP3:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK21-NEXT: [[TMP4:%.*]] = load i16*, i16** [[C_ADDR]], align 8 // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK21-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK21-NEXT: store i32 [[TMP5]], i32* [[CONV4]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 -// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 -// CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK21-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store i64 [[TMP2]], i64* [[TMP8]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK21-NEXT: store i64 [[TMP3]], i64* [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK21-NEXT: store i16* [[TMP4]], i16** [[TMP10]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK21-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV3]], align 1 +// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK21-NEXT: store i8 [[FROMBOOL]], i8* [[CONV5]], align 1 -// CHECK21-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK21-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 1 -// CHECK21-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK21-NEXT: br i1 [[TOBOOL6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK21-NEXT: store i8 [[FROMBOOL]], i8* [[TMP11]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = load i8, i8* [[CONV3]], align 1 +// CHECK21-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK21-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: -// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]]) +// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: br label [[OMP_IF_END:%.*]] // CHECK21: omp_if.else: // CHECK21-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK21-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK21-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK21-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]]) #[[ATTR2:[0-9]+]] +// CHECK21-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK21-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: @@ -10949,16 +11217,13 @@ // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK21-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 +// CHECK21-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -10968,138 +11233,144 @@ // CHECK21-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK21-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK21-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK21-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 5 +// CHECK21-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 6 +// CHECK21-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP11]], align 8 +// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK21-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK21-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK21-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK21-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV3]], align 1 -// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 -// CHECK21-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK21-NEXT: [[TMP13:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK21-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK21-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP7]], 3 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK21-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP16]], 3 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK21-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK21-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK21-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 -// CHECK21-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !26 -// CHECK21-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] -// CHECK21-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 +// CHECK21-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !26 +// CHECK21-NEXT: [[CMP2:%.*]] = icmp ule i64 [[TMP19]], [[TMP20]] +// CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 -// CHECK21-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 +// CHECK21-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 +// CHECK21-NEXT: [[MUL:%.*]] = mul i64 [[TMP21]], 400 // CHECK21-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK21-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !26 -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !26 -// CHECK21-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double -// CHECK21-NEXT: [[ADD:%.*]] = fadd double [[CONV5]], 1.500000e+00 -// CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP22:%.*]] = load i32, i32* [[B]], align 4, !llvm.access.group !26 +// CHECK21-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double +// CHECK21-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK21-NEXT: store double [[ADD]], double* [[A]], align 8, !nontemporal !27, !llvm.access.group !26 -// CHECK21-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP14:%.*]] = load double, double* [[A6]], align 8, !nontemporal !27, !llvm.access.group !26 -// CHECK21-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK21-NEXT: store double [[INC]], double* [[A6]], align 8, !nontemporal !27, !llvm.access.group !26 -// CHECK21-NEXT: [[CONV7:%.*]] = fptosi double [[INC]] to i16 -// CHECK21-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP15]] -// CHECK21-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK21-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2, !llvm.access.group !26 +// CHECK21-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP23:%.*]] = load double, double* [[A3]], align 8, !nontemporal !27, !llvm.access.group !26 +// CHECK21-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK21-NEXT: store double [[INC]], double* [[A3]], align 8, !nontemporal !27, !llvm.access.group !26 +// CHECK21-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK21-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i64 [[TMP24]] +// CHECK21-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK21-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2, !llvm.access.group !26 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 -// CHECK21-NEXT: [[ADD9:%.*]] = add i64 [[TMP16]], 1 -// CHECK21-NEXT: store i64 [[ADD9]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 +// CHECK21-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 +// CHECK21-NEXT: [[ADD6:%.*]] = add i64 [[TMP25]], 1 +// CHECK21-NEXT: store i64 [[ADD6]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_IF_END:%.*]] // CHECK21: omp_if.else: -// CHECK21-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK21-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[CMP10:%.*]] = icmp ugt i64 [[TMP19]], 3 -// CHECK21-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK21: cond.true11: -// CHECK21-NEXT: br label [[COND_END13:%.*]] -// CHECK21: cond.false12: -// CHECK21-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: br label [[COND_END13]] -// CHECK21: cond.end13: -// CHECK21-NEXT: [[COND14:%.*]] = phi i64 [ 3, [[COND_TRUE11]] ], [ [[TMP20]], [[COND_FALSE12]] ] -// CHECK21-NEXT: store i64 [[COND14]], i64* [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK21-NEXT: store i64 [[TMP21]], i64* [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND15:%.*]] -// CHECK21: omp.inner.for.cond15: -// CHECK21-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[CMP16:%.*]] = icmp ule i64 [[TMP22]], [[TMP23]] -// CHECK21-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY17:%.*]], label [[OMP_INNER_FOR_END31:%.*]] -// CHECK21: omp.inner.for.body17: -// CHECK21-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: [[MUL18:%.*]] = mul i64 [[TMP24]], 400 -// CHECK21-NEXT: [[SUB19:%.*]] = sub i64 2000, [[MUL18]] -// CHECK21-NEXT: store i64 [[SUB19]], i64* [[IT]], align 8 -// CHECK21-NEXT: [[TMP25:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK21-NEXT: [[CONV20:%.*]] = sitofp i32 [[TMP25]] to double -// CHECK21-NEXT: [[ADD21:%.*]] = fadd double [[CONV20]], 1.500000e+00 -// CHECK21-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: store double [[ADD21]], double* [[A22]], align 8 -// CHECK21-NEXT: [[A23:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP26:%.*]] = load double, double* [[A23]], align 8 -// CHECK21-NEXT: [[INC24:%.*]] = fadd double [[TMP26]], 1.000000e+00 -// CHECK21-NEXT: store double [[INC24]], double* [[A23]], align 8 -// CHECK21-NEXT: [[CONV25:%.*]] = fptosi double [[INC24]] to i16 -// CHECK21-NEXT: [[TMP27:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK21-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP27]] -// CHECK21-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX26]], i64 1 -// CHECK21-NEXT: store i16 [[CONV25]], i16* [[ARRAYIDX27]], align 2 -// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE28:%.*]] -// CHECK21: omp.body.continue28: -// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC29:%.*]] -// CHECK21: omp.inner.for.inc29: -// CHECK21-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: [[ADD30:%.*]] = add i64 [[TMP28]], 1 -// CHECK21-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND15]], !llvm.loop [[LOOP30:![0-9]+]] -// CHECK21: omp.inner.for.end31: +// CHECK21-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK21-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[CMP7:%.*]] = icmp ugt i64 [[TMP28]], 3 +// CHECK21-NEXT: br i1 [[CMP7]], label [[COND_TRUE8:%.*]], label [[COND_FALSE9:%.*]] +// CHECK21: cond.true8: +// CHECK21-NEXT: br label [[COND_END10:%.*]] +// CHECK21: cond.false9: +// CHECK21-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: br label [[COND_END10]] +// CHECK21: cond.end10: +// CHECK21-NEXT: [[COND11:%.*]] = phi i64 [ 3, [[COND_TRUE8]] ], [ [[TMP29]], [[COND_FALSE9]] ] +// CHECK21-NEXT: store i64 [[COND11]], i64* [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK21-NEXT: store i64 [[TMP30]], i64* [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK21: omp.inner.for.cond12: +// CHECK21-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[CMP13:%.*]] = icmp ule i64 [[TMP31]], [[TMP32]] +// CHECK21-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END28:%.*]] +// CHECK21: omp.inner.for.body14: +// CHECK21-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[MUL15:%.*]] = mul i64 [[TMP33]], 400 +// CHECK21-NEXT: [[SUB16:%.*]] = sub i64 2000, [[MUL15]] +// CHECK21-NEXT: store i64 [[SUB16]], i64* [[IT]], align 8 +// CHECK21-NEXT: [[TMP34:%.*]] = load i32, i32* [[B]], align 4 +// CHECK21-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP34]] to double +// CHECK21-NEXT: [[ADD18:%.*]] = fadd double [[CONV17]], 1.500000e+00 +// CHECK21-NEXT: [[A19:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: store double [[ADD18]], double* [[A19]], align 8 +// CHECK21-NEXT: [[A20:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP35:%.*]] = load double, double* [[A20]], align 8 +// CHECK21-NEXT: [[INC21:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK21-NEXT: store double [[INC21]], double* [[A20]], align 8 +// CHECK21-NEXT: [[CONV22:%.*]] = fptosi double [[INC21]] to i16 +// CHECK21-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK21-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i64 [[TMP36]] +// CHECK21-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX23]], i64 1 +// CHECK21-NEXT: store i16 [[CONV22]], i16* [[ARRAYIDX24]], align 2 +// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE25:%.*]] +// CHECK21: omp.body.continue25: +// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC26:%.*]] +// CHECK21: omp.inner.for.inc26: +// CHECK21-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[ADD27:%.*]] = add i64 [[TMP37]], 1 +// CHECK21-NEXT: store i64 [[ADD27]], i64* [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK21: omp.inner.for.end28: // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK21-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK21-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK21-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK21-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i64 400, i64* [[IT]], align 8 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11113,34 +11384,33 @@ // CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK21-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK21-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK21-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK21-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK21-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK21-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK21-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 8 +// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -11150,70 +11420,74 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK21-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK21-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK21-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK21-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK21-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK21-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK21-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK21-NEXT: store i64 6, i64* [[DOTOMP_UB]], align 8 // CHECK21-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK21-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK21-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK21-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK21-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK21-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_IV]], align 8 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !32 -// CHECK21-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !32 -// CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] -// CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !32 +// CHECK21-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !32 +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] +// CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !32 -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK21-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !32 +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK21-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !32 -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !32 -// CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK21-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !32 -// CHECK21-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !32 -// CHECK21-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK21-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK21-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !32 -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 -// CHECK21-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK21-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !32 +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK21-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !32 +// CHECK21-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !32 +// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK21-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK21-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !32 +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 2 +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 +// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK21-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !32 -// CHECK21-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK21-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !32 +// CHECK21-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !32 +// CHECK21-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 +// CHECK21-NEXT: store i64 [[ADD6]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !32 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK21-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK21-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i64 11, i64* [[I]], align 8 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11224,15 +11498,17 @@ // CHECK23-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK23-SAME: () #[[ATTR0:[0-9]+]] { // CHECK23-NEXT: entry: -// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11242,52 +11518,54 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK23-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 5, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK23-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK23-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK23-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK23-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 33, i32* [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11301,35 +11579,33 @@ // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[LIN_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK23-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK23-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK23-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK23-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK23-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[LIN_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP2]], i32* [[LIN_CASTED]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[LIN_CASTED]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK23-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[LIN_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[LIN:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK23-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK23-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK23-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -11344,96 +11620,103 @@ // CHECK23-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK23-NEXT: [[TMP0:%.*]] = load i32, i32* [[LIN_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START1]], align 4 +// CHECK23-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 4 +// CHECK23-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK23-NEXT: store i32 [[TMP4]], i32* [[LIN]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK23-NEXT: store i32 [[TMP6]], i32* [[A]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[LIN]], align 4 +// CHECK23-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK23-NEXT: store i32 [[TMP8]], i32* [[DOTLINEAR_START1]], align 4 // CHECK23-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK23-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 // CHECK23-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK23-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK23-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK23-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK23-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK23-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK23-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP10]]) +// CHECK23-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK23-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK23-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK23-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK23-NEXT: store i64 [[TMP13]], i64* [[DOTOMP_IV]], align 8 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 -// CHECK23-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !18 -// CHECK23-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK23-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK23-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !18 +// CHECK23-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK23-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 -// CHECK23-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK23-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK23-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK23-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK23-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !18 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !18 -// CHECK23-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK23-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 -// CHECK23-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !18 -// CHECK23-NEXT: [[MUL6:%.*]] = mul i64 [[TMP11]], [[TMP12]] -// CHECK23-NEXT: [[ADD:%.*]] = add i64 [[CONV5]], [[MUL6]] -// CHECK23-NEXT: [[CONV7:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK23-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4, !llvm.access.group !18 -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4, !llvm.access.group !18 -// CHECK23-NEXT: [[CONV8:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK23-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 -// CHECK23-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !18 -// CHECK23-NEXT: [[MUL9:%.*]] = mul i64 [[TMP14]], [[TMP15]] -// CHECK23-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] -// CHECK23-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 -// CHECK23-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !18 -// CHECK23-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !18 -// CHECK23-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK23-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 -// CHECK23-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK23-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !18 +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !18 +// CHECK23-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK23-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK23-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !18 +// CHECK23-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] +// CHECK23-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] +// CHECK23-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK23-NEXT: store i32 [[CONV6]], i32* [[LIN2]], align 4, !llvm.access.group !18 +// CHECK23-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4, !llvm.access.group !18 +// CHECK23-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK23-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK23-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !18 +// CHECK23-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] +// CHECK23-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] +// CHECK23-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 +// CHECK23-NEXT: store i32 [[CONV10]], i32* [[A3]], align 4, !llvm.access.group !18 +// CHECK23-NEXT: [[TMP23:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !18 +// CHECK23-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK23-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 +// CHECK23-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 +// CHECK23-NEXT: store i16 [[CONV13]], i16* [[AA]], align 2, !llvm.access.group !18 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 -// CHECK23-NEXT: [[ADD15:%.*]] = add i64 [[TMP17]], 1 -// CHECK23-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK23-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK23-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 +// CHECK23-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK23-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK23-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK23-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i64 400, i64* [[IT]], align 8 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK23: .omp.final.done: -// CHECK23-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK23-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK23-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK23: .omp.linear.pu: -// CHECK23-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN2]], align 4 -// CHECK23-NEXT: store i32 [[TMP22]], i32* [[LIN_ADDR]], align 4 -// CHECK23-NEXT: [[TMP23:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK23-NEXT: store i32 [[TMP23]], i32* [[A_ADDR]], align 4 +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, i32* [[LIN2]], align 4 +// CHECK23-NEXT: store i32 [[TMP29]], i32* [[LIN]], align 4 +// CHECK23-NEXT: [[TMP30:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK23-NEXT: store i32 [[TMP30]], i32* [[A]], align 4 // CHECK23-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK23: .omp.linear.pu.done: // CHECK23-NEXT: ret void @@ -11450,29 +11733,28 @@ // CHECK23-NEXT: entry: // CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK23-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK23-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK23-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11482,64 +11764,69 @@ // CHECK23-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK23-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK23-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK23-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 3, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK23-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] -// CHECK23-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK23-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2, !llvm.access.group !21 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !21 -// CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK23-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !21 -// CHECK23-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !21 -// CHECK23-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK23-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK23-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2, !llvm.access.group !21 +// CHECK23-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK23-NEXT: store i16 [[CONV]], i16* [[IT]], align 2, !llvm.access.group !21 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !21 +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK23-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !21 +// CHECK23-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !21 +// CHECK23-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK23-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 +// CHECK23-NEXT: store i16 [[CONV5]], i16* [[AA]], align 2, !llvm.access.group !21 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK23-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK23-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK23-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK23-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK23-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i16 22, i16* [[IT]], align 2 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11560,8 +11847,7 @@ // CHECK23-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 // CHECK23-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK23-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 // CHECK23-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -11580,31 +11866,40 @@ // CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 // CHECK23-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 // CHECK23-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x float]*, i32, float*, [5 x [10 x double]]*, i32, i32, double*, %struct.TT*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP9]], [10 x float]* [[TMP0]], i32 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]], i32 [[TMP11]]) +// CHECK23-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store i32 [[TMP1]], i32* [[TMP11]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store float* [[TMP2]], float** [[TMP12]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK23-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK23-NEXT: store i32 [[TMP4]], i32* [[TMP14]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK23-NEXT: store i32 [[TMP5]], i32* [[TMP15]], align 4 +// CHECK23-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK23-NEXT: store double* [[TMP6]], double** [[TMP16]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK23-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4 +// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 -// CHECK23-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 -// CHECK23-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 -// CHECK23-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 -// CHECK23-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11614,125 +11909,131 @@ // CHECK23-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK23-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK23-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 -// CHECK23-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 -// CHECK23-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 -// CHECK23-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// CHECK23-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 5 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 6 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 7 +// CHECK23-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 8 +// CHECK23-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 4 +// CHECK23-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 9 +// CHECK23-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK23-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK23-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK23: omp.dispatch.cond: -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK23-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK23-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK23-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK23: omp.dispatch.body: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK23-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK23-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK23-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK23-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK23-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK23-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK23-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK23-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK23-NEXT: store i8 [[CONV]], i8* [[IT]], align 1, !llvm.access.group !24 -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !24 -// CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK23-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4, !llvm.access.group !24 -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 -// CHECK23-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK23-NEXT: [[TMP32:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !24 +// CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK23-NEXT: store i32 [[ADD]], i32* [[A]], align 4, !llvm.access.group !24 +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP33:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 +// CHECK23-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK23-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK23-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK23-NEXT: store float [[CONV5]], float* [[ARRAYIDX]], align 4, !llvm.access.group !24 +// CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 3 +// CHECK23-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !24 +// CHECK23-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK23-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK23-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK23-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !24 -// CHECK23-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK23-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 -// CHECK23-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK23-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK23-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK23-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 -// CHECK23-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 -// CHECK23-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 -// CHECK23-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK23-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 -// CHECK23-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK23-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] -// CHECK23-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK23-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 -// CHECK23-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK23-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 -// CHECK23-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !24 -// CHECK23-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK23-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !24 -// CHECK23-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK23-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !24 -// CHECK23-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK23-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK23-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK23-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !24 +// CHECK23-NEXT: store float [[CONV9]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !24 +// CHECK23-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i32 0, i32 1 +// CHECK23-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX10]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX11]], align 8, !llvm.access.group !24 +// CHECK23-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK23-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !24 +// CHECK23-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK23-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 [[TMP36]] +// CHECK23-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX13]], i32 3 +// CHECK23-NEXT: [[TMP37:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !24 +// CHECK23-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK23-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !24 +// CHECK23-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP38:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !24 +// CHECK23-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK23-NEXT: store i64 [[ADD16]], i64* [[X]], align 4, !llvm.access.group !24 +// CHECK23-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP39:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !24 +// CHECK23-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK23-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK23-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK23-NEXT: store i8 [[CONV19]], i8* [[Y]], align 4, !llvm.access.group !24 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK23-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK23-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK23-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK23-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK23-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK23: omp.dispatch.inc: -// CHECK23-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK23-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK23-NEXT: store i32 [[ADD25]], i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK23-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK23-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK23-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK23-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK23-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK23-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK23: omp.dispatch.end: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) -// CHECK23-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK23-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK23-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK23-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i8 96, i8* [[IT]], align 1 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11747,9 +12048,7 @@ // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 @@ -11757,41 +12056,47 @@ // CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* // CHECK23-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK23-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK23-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK23-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* -// CHECK23-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 -// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK23-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK23-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 4 +// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK23-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 -// CHECK23-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* -// CHECK23-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK23-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK23-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK23-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 4 // CHECK23-NEXT: ret void // // @@ -11804,8 +12109,7 @@ // CHECK23-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK23-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) @@ -11820,26 +12124,33 @@ // CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK23-NEXT: [[TMP4:%.*]] = load i16*, i16** [[C_ADDR]], align 4 // CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP5]], i32* [[B_CASTED]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 -// CHECK23-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store i32 [[TMP2]], i32* [[TMP8]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store i32 [[TMP3]], i32* [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK23-NEXT: store i16* [[TMP4]], i16** [[TMP10]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK23-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK23-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK23-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK23-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK23-NEXT: store i8 [[FROMBOOL]], i8* [[TMP11]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK23-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK23-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: -// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]]) +// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: br label [[OMP_IF_END:%.*]] // CHECK23: omp_if.else: // CHECK23-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK23-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK23-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]]) #[[ATTR2:[0-9]+]] +// CHECK23-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK23-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK23-NEXT: br label [[OMP_IF_END]] // CHECK23: omp_if.end: @@ -11847,16 +12158,13 @@ // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK23-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK23-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK23-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -11866,137 +12174,144 @@ // CHECK23-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK23-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK23-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK23-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK23-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP11]], align 4 +// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK23-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK23-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK23-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK23-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 -// CHECK23-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK23-NEXT: [[TMP13:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK23-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK23-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP7]], 3 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK23-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP16]], 3 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK23-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK23-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK23-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 -// CHECK23-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !27 -// CHECK23-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] -// CHECK23-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK23-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 +// CHECK23-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !27 +// CHECK23-NEXT: [[CMP2:%.*]] = icmp ule i64 [[TMP19]], [[TMP20]] +// CHECK23-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 -// CHECK23-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 +// CHECK23-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 +// CHECK23-NEXT: [[MUL:%.*]] = mul i64 [[TMP21]], 400 // CHECK23-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK23-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !27 -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !27 -// CHECK23-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP13]] to double -// CHECK23-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 -// CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, i32* [[B]], align 4, !llvm.access.group !27 +// CHECK23-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double +// CHECK23-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK23-NEXT: store double [[ADD]], double* [[A]], align 4, !nontemporal !28, !llvm.access.group !27 -// CHECK23-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP14:%.*]] = load double, double* [[A5]], align 4, !nontemporal !28, !llvm.access.group !27 -// CHECK23-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK23-NEXT: store double [[INC]], double* [[A5]], align 4, !nontemporal !28, !llvm.access.group !27 -// CHECK23-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK23-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP15]] -// CHECK23-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK23-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2, !llvm.access.group !27 +// CHECK23-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP23:%.*]] = load double, double* [[A3]], align 4, !nontemporal !28, !llvm.access.group !27 +// CHECK23-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK23-NEXT: store double [[INC]], double* [[A3]], align 4, !nontemporal !28, !llvm.access.group !27 +// CHECK23-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK23-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 [[TMP24]] +// CHECK23-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK23-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2, !llvm.access.group !27 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 -// CHECK23-NEXT: [[ADD8:%.*]] = add i64 [[TMP16]], 1 -// CHECK23-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 +// CHECK23-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 +// CHECK23-NEXT: [[ADD6:%.*]] = add i64 [[TMP25]], 1 +// CHECK23-NEXT: store i64 [[ADD6]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_IF_END:%.*]] // CHECK23: omp_if.else: -// CHECK23-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK23-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[CMP9:%.*]] = icmp ugt i64 [[TMP19]], 3 -// CHECK23-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] -// CHECK23: cond.true10: -// CHECK23-NEXT: br label [[COND_END12:%.*]] -// CHECK23: cond.false11: -// CHECK23-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: br label [[COND_END12]] -// CHECK23: cond.end12: -// CHECK23-NEXT: [[COND13:%.*]] = phi i64 [ 3, [[COND_TRUE10]] ], [ [[TMP20]], [[COND_FALSE11]] ] -// CHECK23-NEXT: store i64 [[COND13]], i64* [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK23-NEXT: store i64 [[TMP21]], i64* [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND14:%.*]] -// CHECK23: omp.inner.for.cond14: -// CHECK23-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[CMP15:%.*]] = icmp ule i64 [[TMP22]], [[TMP23]] -// CHECK23-NEXT: br i1 [[CMP15]], label [[OMP_INNER_FOR_BODY16:%.*]], label [[OMP_INNER_FOR_END30:%.*]] -// CHECK23: omp.inner.for.body16: -// CHECK23-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: [[MUL17:%.*]] = mul i64 [[TMP24]], 400 -// CHECK23-NEXT: [[SUB18:%.*]] = sub i64 2000, [[MUL17]] -// CHECK23-NEXT: store i64 [[SUB18]], i64* [[IT]], align 8 -// CHECK23-NEXT: [[TMP25:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK23-NEXT: [[CONV19:%.*]] = sitofp i32 [[TMP25]] to double -// CHECK23-NEXT: [[ADD20:%.*]] = fadd double [[CONV19]], 1.500000e+00 -// CHECK23-NEXT: [[A21:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: store double [[ADD20]], double* [[A21]], align 4 -// CHECK23-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP26:%.*]] = load double, double* [[A22]], align 4 -// CHECK23-NEXT: [[INC23:%.*]] = fadd double [[TMP26]], 1.000000e+00 -// CHECK23-NEXT: store double [[INC23]], double* [[A22]], align 4 -// CHECK23-NEXT: [[CONV24:%.*]] = fptosi double [[INC23]] to i16 -// CHECK23-NEXT: [[TMP27:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK23-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP27]] -// CHECK23-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX25]], i32 1 -// CHECK23-NEXT: store i16 [[CONV24]], i16* [[ARRAYIDX26]], align 2 -// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] -// CHECK23: omp.body.continue27: -// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] -// CHECK23: omp.inner.for.inc28: -// CHECK23-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: [[ADD29:%.*]] = add i64 [[TMP28]], 1 -// CHECK23-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND14]], !llvm.loop [[LOOP31:![0-9]+]] -// CHECK23: omp.inner.for.end30: +// CHECK23-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK23-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[CMP7:%.*]] = icmp ugt i64 [[TMP28]], 3 +// CHECK23-NEXT: br i1 [[CMP7]], label [[COND_TRUE8:%.*]], label [[COND_FALSE9:%.*]] +// CHECK23: cond.true8: +// CHECK23-NEXT: br label [[COND_END10:%.*]] +// CHECK23: cond.false9: +// CHECK23-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: br label [[COND_END10]] +// CHECK23: cond.end10: +// CHECK23-NEXT: [[COND11:%.*]] = phi i64 [ 3, [[COND_TRUE8]] ], [ [[TMP29]], [[COND_FALSE9]] ] +// CHECK23-NEXT: store i64 [[COND11]], i64* [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK23-NEXT: store i64 [[TMP30]], i64* [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK23: omp.inner.for.cond12: +// CHECK23-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[CMP13:%.*]] = icmp ule i64 [[TMP31]], [[TMP32]] +// CHECK23-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END28:%.*]] +// CHECK23: omp.inner.for.body14: +// CHECK23-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[MUL15:%.*]] = mul i64 [[TMP33]], 400 +// CHECK23-NEXT: [[SUB16:%.*]] = sub i64 2000, [[MUL15]] +// CHECK23-NEXT: store i64 [[SUB16]], i64* [[IT]], align 8 +// CHECK23-NEXT: [[TMP34:%.*]] = load i32, i32* [[B]], align 4 +// CHECK23-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP34]] to double +// CHECK23-NEXT: [[ADD18:%.*]] = fadd double [[CONV17]], 1.500000e+00 +// CHECK23-NEXT: [[A19:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: store double [[ADD18]], double* [[A19]], align 4 +// CHECK23-NEXT: [[A20:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP35:%.*]] = load double, double* [[A20]], align 4 +// CHECK23-NEXT: [[INC21:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK23-NEXT: store double [[INC21]], double* [[A20]], align 4 +// CHECK23-NEXT: [[CONV22:%.*]] = fptosi double [[INC21]] to i16 +// CHECK23-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK23-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 [[TMP36]] +// CHECK23-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX23]], i32 1 +// CHECK23-NEXT: store i16 [[CONV22]], i16* [[ARRAYIDX24]], align 2 +// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE25:%.*]] +// CHECK23: omp.body.continue25: +// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC26:%.*]] +// CHECK23: omp.inner.for.inc26: +// CHECK23-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[ADD27:%.*]] = add i64 [[TMP37]], 1 +// CHECK23-NEXT: store i64 [[ADD27]], i64* [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK23: omp.inner.for.end28: // CHECK23-NEXT: br label [[OMP_IF_END]] // CHECK23: omp_if.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK23-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK23-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK23-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK23-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i64 400, i64* [[IT]], align 8 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12010,32 +12325,32 @@ // CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK23-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK23-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK23-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK23-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 4 +// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK23-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -12045,69 +12360,74 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK23-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK23-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK23-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK23-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK23-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK23-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK23-NEXT: store i64 6, i64* [[DOTOMP_UB]], align 8 // CHECK23-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK23-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK23-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK23-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK23-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK23-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_IV]], align 8 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !33 -// CHECK23-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !33 -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK23-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !33 +// CHECK23-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !33 +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !33 -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK23-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !33 +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK23-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !33 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK23-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK23-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !33 -// CHECK23-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK23-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK23-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !33 -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 -// CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK23-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !33 +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK23-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !33 +// CHECK23-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !33 +// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK23-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK23-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !33 +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 +// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK23-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !33 -// CHECK23-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK23-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !33 +// CHECK23-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !33 +// CHECK23-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 +// CHECK23-NEXT: store i64 [[ADD6]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !33 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK23-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK23-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i64 11, i64* [[I]], align 8 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_parallel_if_codegen.cpp b/clang/test/OpenMP/target_parallel_if_codegen.cpp --- a/clang/test/OpenMP/target_parallel_if_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_if_codegen.cpp @@ -488,7 +488,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -498,21 +498,22 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR3]] +// CHECK1-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -520,22 +521,25 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double -// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], double* [[A]], align 8 // CHECK1-NEXT: ret void // @@ -545,6 +549,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -552,17 +557,19 @@ // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]] +// CHECK1-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -570,16 +577,18 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double 2.500000e+00, double* [[A]], align 8 // CHECK1-NEXT: ret void // @@ -588,6 +597,7 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -597,13 +607,13 @@ // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] +// CHECK1-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -611,29 +621,36 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -641,37 +658,40 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR3]] +// CHECK1-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -680,42 +700,44 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[B]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[B]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -1029,7 +1051,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -1038,20 +1060,22 @@ // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR3]] +// CHECK3-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -1059,21 +1083,25 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], double* [[A]], align 4 // CHECK3-NEXT: ret void // @@ -1083,6 +1111,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -1090,17 +1119,19 @@ // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]] +// CHECK3-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -1108,16 +1139,18 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double 2.500000e+00, double* [[A]], align 4 // CHECK3-NEXT: ret void // @@ -1126,6 +1159,7 @@ // CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -1135,13 +1169,13 @@ // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] +// CHECK3-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -1149,29 +1183,36 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1179,34 +1220,39 @@ // CHECK3-SAME: (i32 noundef [[A:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR3]] +// CHECK3-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -1215,39 +1261,43 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[B_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[B]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[B]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -1262,6 +1312,7 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -1271,13 +1322,13 @@ // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] +// CHECK9-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: @@ -1285,29 +1336,36 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108 // CHECK9-SAME: () #[[ATTR0]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1317,7 +1375,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -1327,21 +1385,22 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i64 [[TMP3]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR2]] +// CHECK9-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: @@ -1349,22 +1408,25 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double -// CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], double* [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1374,6 +1436,7 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -1381,17 +1444,19 @@ // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK9-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]] +// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: @@ -1399,16 +1464,18 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double 2.500000e+00, double* [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1417,37 +1484,40 @@ // CHECK9-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR2]] +// CHECK9-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK9-NEXT: ret void // // @@ -1456,42 +1526,44 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK9-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[B]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[B]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK9-NEXT: ret void // // @@ -1499,6 +1571,7 @@ // CHECK11-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -1508,13 +1581,13 @@ // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] +// CHECK11-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: @@ -1522,29 +1595,36 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108 // CHECK11-SAME: () #[[ATTR0]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1554,7 +1634,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -1563,20 +1643,22 @@ // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i32 [[TMP3]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK11-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: @@ -1584,21 +1666,25 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], double* [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1608,6 +1694,7 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -1615,17 +1702,19 @@ // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]] +// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: @@ -1633,16 +1722,18 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double 2.500000e+00, double* [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1651,34 +1742,39 @@ // CHECK11-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR2]] +// CHECK11-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK11-NEXT: ret void // // @@ -1687,38 +1783,42 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[B_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[B]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[B]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp --- a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp @@ -457,7 +457,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -467,31 +467,35 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i64 [[TMP4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double -// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], double* [[A]], align 8 // CHECK1-NEXT: ret void // @@ -500,25 +504,30 @@ // CHECK1-SAME: (%struct.S1* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1024) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP2]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double 2.500000e+00, double* [[A]], align 8 // CHECK1-NEXT: ret void // @@ -527,22 +536,26 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -550,41 +563,49 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 20) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -594,8 +615,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -606,36 +626,39 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP4]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK1-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[B]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[B]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -909,7 +932,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -917,29 +940,35 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], double* [[A]], align 4 // CHECK3-NEXT: ret void // @@ -948,25 +977,30 @@ // CHECK3-SAME: (%struct.S1* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1024) -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP2]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double 2.500000e+00, double* [[A]], align 4 // CHECK3-NEXT: ret void // @@ -975,21 +1009,25 @@ // CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -997,40 +1035,48 @@ // CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 20) -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1040,8 +1086,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -1051,34 +1096,39 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP4]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[B]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[B]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -1093,22 +1143,26 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1116,22 +1170,26 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1141,7 +1199,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -1151,31 +1209,35 @@ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double -// CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], double* [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1184,25 +1246,30 @@ // CHECK9-SAME: (%struct.S1* noundef [[THIS:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1024) -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP2]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double 2.500000e+00, double* [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1210,19 +1277,23 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK9-SAME: () #[[ATTR0]] { // CHECK9-NEXT: entry: +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 20) -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1232,8 +1303,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -1244,36 +1314,39 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK9-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK9-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[B]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[B]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK9-NEXT: ret void // // @@ -1281,21 +1354,25 @@ // CHECK11-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1303,21 +1380,25 @@ // CHECK11-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1327,7 +1408,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -1335,29 +1416,35 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], double* [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1366,25 +1453,30 @@ // CHECK11-SAME: (%struct.S1* noundef [[THIS:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1024) -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP2]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double 2.500000e+00, double* [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1392,19 +1484,23 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK11-SAME: () #[[ATTR0]] { // CHECK11-NEXT: entry: +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 20) -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1414,8 +1510,7 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -1425,33 +1520,38 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP4]], i32 [[TMP6]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[B]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[B]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp --- a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp @@ -52,238 +52,244 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]], i8** [[TMP1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store i8** [[TMP3]], i8*** [[TMP2]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP24:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP23:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint i8* [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP13]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP14]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8*, i8** [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint i8* [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP16]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint i8* [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store i8** [[_TMP5]], i8*** [[TMP]], align 8 -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint i8* [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store i8** [[_TMP4]], i8*** [[TMP]], align 8 +// CHECK1-NEXT: store i8* [[TMP24]], i8** [[_TMP4]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, i64* [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[TMP30]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP31]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8*, i8** [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load i8*, i8** [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, i8* [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP36]] -// CHECK1-NEXT: [[TMP37:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8*, i8** [[TMP37]], i64 9 -// CHECK1-NEXT: [[TMP38:%.*]] = load i8*, i8** [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[TMP38]], i64 [[LB_ADD_LEN9]] -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store i8* [[ARRAYIDX8]], i8** [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint i8* [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint i8* [[ARRAYIDX8]] to i64 -// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] -// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 -// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP45]], i64* [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..1 to i8*), i8** [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..2 to i8*), i8** [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, i32* [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 -// CHECK1-NEXT: [[TMP53:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* -// CHECK1-NEXT: [[TMP54:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]], i32 0, i32 2, i8* [[TMP53]]) -// CHECK1-NEXT: store i8* [[TMP54]], i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP55]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[ARGC1]], i32** [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP58:%.*]] = load i8**, i8*** [[TMP]], align 8 -// CHECK1-NEXT: store i8** [[TMP58]], i8*** [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, i32* [[TMP59]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP60]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP62:%.*]] = bitcast i8* [[TMP61]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP62]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP63]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP65:%.*]] = load i8*, i8** [[TMP64]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP65]], i8* align 8 [[TMP66]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP62]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP67]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP69:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store i8* [[TMP69]], i8** [[TMP68]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP71:%.*]] = load i32, i32* [[TMP70]], align 4 -// CHECK1-NEXT: [[TMP72:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP71]], i8* [[TMP61]]) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[ARGC]] to i8* +// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP2]] to i8* +// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, i64* [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP34]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP36:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP36]], i64 0 +// CHECK1-NEXT: [[TMP37:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP37]], i64 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 -1, [[TMP39]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8*, i8** [[TMP40]], i64 9 +// CHECK1-NEXT: [[TMP41:%.*]] = load i8*, i8** [[ARRAYIDX9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 [[LB_ADD_LEN8]] +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 +// CHECK1-NEXT: store i8* [[ARRAYIDX7]], i8** [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = ptrtoint i8* [[ARRAYIDX10]] to i64 +// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint i8* [[ARRAYIDX7]] to i64 +// CHECK1-NEXT: [[TMP45:%.*]] = sub i64 [[TMP43]], [[TMP44]] +// CHECK1-NEXT: [[TMP46:%.*]] = sdiv exact i64 [[TMP45]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP47:%.*]] = add nuw i64 [[TMP46]], 1 +// CHECK1-NEXT: [[TMP48:%.*]] = mul nuw i64 [[TMP47]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP48]], i64* [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..1 to i8*), i8** [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP51]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..2 to i8*), i8** [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, i32* [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[TMP54]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* +// CHECK1-NEXT: [[TMP57:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP55]], i32 0, i32 2, i8* [[TMP56]]) +// CHECK1-NEXT: store i8* [[TMP57]], i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP58]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[ARGC]], i32** [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP61:%.*]] = load i8**, i8*** [[TMP]], align 8 +// CHECK1-NEXT: store i8** [[TMP61]], i8*** [[TMP60]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, i32* [[TMP62]], align 4 +// CHECK1-NEXT: [[TMP64:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP63]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP65:%.*]] = bitcast i8* [[TMP64]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP65]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP66]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP68:%.*]] = load i8*, i8** [[TMP67]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP68]], i8* align 8 [[TMP69]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP65]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP70]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP72:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store i8* [[TMP72]], i8** [[TMP71]], align 8 // CHECK1-NEXT: [[TMP73:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP74:%.*]] = load i32, i32* [[TMP73]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP74]], i32 0) -// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP76:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP76]], i8** [[TMP75]], align 8 -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP77]], align 8 -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP79:%.*]] = inttoptr i64 [[TMP11]] to i8* +// CHECK1-NEXT: [[TMP75:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP74]], i8* [[TMP64]]) +// CHECK1-NEXT: [[TMP76:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = load i32, i32* [[TMP76]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP77]], i32 0) +// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP79:%.*]] = bitcast i32* [[ARGC]] to i8* // CHECK1-NEXT: store i8* [[TMP79]], i8** [[TMP78]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, i32* [[TMP80]], align 4 -// CHECK1-NEXT: [[TMP82:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP83:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP81]], i32 2, i64 24, i8* [[TMP82]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP83]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP80]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP82:%.*]] = inttoptr i64 [[TMP14]] to i8* +// CHECK1-NEXT: store i8* [[TMP82]], i8** [[TMP81]], align 8 +// CHECK1-NEXT: [[TMP83:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = load i32, i32* [[TMP83]], align 4 +// CHECK1-NEXT: [[TMP85:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP86:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP84]], i32 2, i64 24, i8* [[TMP85]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP86]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP84:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP84]], [[TMP85]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP86]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP87:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP87]], [[TMP88]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP89]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP87:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP87]] to i32 -// CHECK1-NEXT: [[TMP88:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP88]] to i32 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV]], [[CONV13]] -// CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK1-NEXT: store i8 [[CONV15]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP90:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP90]] to i32 +// CHECK1-NEXT: [[TMP91:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV12:%.*]] = sext i8 [[TMP91]] to i32 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV]], [[CONV12]] +// CHECK1-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i8 +// CHECK1-NEXT: store i8 [[CONV14]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP86]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP81]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP89]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done17: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP84]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP89:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP90:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP89]] monotonic, align 4 -// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP91]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] -// CHECK1: omp.arraycpy.body20: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP92:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP92]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST22]] monotonic, align 1 +// CHECK1-NEXT: [[TMP92:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP93:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP92]] monotonic, align 4 +// CHECK1-NEXT: [[TMP94:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP94]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE31:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] +// CHECK1: omp.arraycpy.body19: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT29:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT28:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP95:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV22:%.*]] = sext i8 [[TMP95]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST21]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP93:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY20]] ], [ [[TMP98:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP93]], i8* [[_TMP24]], align 1 -// CHECK1-NEXT: [[TMP94:%.*]] = load i8, i8* [[_TMP24]], align 1 -// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP94]] to i32 -// CHECK1-NEXT: [[TMP95:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP95]] to i32 -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[CONV25]], [[CONV26]] -// CHECK1-NEXT: [[CONV28:%.*]] = trunc i32 [[ADD27]] to i8 -// CHECK1-NEXT: store i8 [[CONV28]], i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP96:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP97:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i8 [[TMP93]], i8 [[TMP96]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP98]] = extractvalue { i8, i1 } [[TMP97]], 0 -// CHECK1-NEXT: [[TMP99:%.*]] = extractvalue { i8, i1 } [[TMP97]], 1 -// CHECK1-NEXT: br i1 [[TMP99]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP96:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY19]] ], [ [[TMP101:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP96]], i8* [[_TMP23]], align 1 +// CHECK1-NEXT: [[TMP97:%.*]] = load i8, i8* [[_TMP23]], align 1 +// CHECK1-NEXT: [[CONV24:%.*]] = sext i8 [[TMP97]] to i32 +// CHECK1-NEXT: [[TMP98:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP98]] to i32 +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[CONV24]], [[CONV25]] +// CHECK1-NEXT: [[CONV27:%.*]] = trunc i32 [[ADD26]] to i8 +// CHECK1-NEXT: store i8 [[CONV27]], i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP99:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP100:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i8 [[TMP96]], i8 [[TMP99]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP101]] = extractvalue { i8, i1 } [[TMP100]], 0 +// CHECK1-NEXT: [[TMP102:%.*]] = extractvalue { i8, i1 } [[TMP100]], 1 +// CHECK1-NEXT: br i1 [[TMP102]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP91]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY20]] -// CHECK1: omp.arraycpy.done32: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT28]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT29]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE30:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT28]], [[TMP94]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_BODY19]] +// CHECK1: omp.arraycpy.done31: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP100:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP100]]) +// CHECK1-NEXT: [[TMP103:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP103]]) // CHECK1-NEXT: ret void // // @@ -393,7 +399,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP4_I:%.*]] = alloca i8*, align 8 @@ -407,7 +413,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -420,29 +426,29 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load i8*, i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 // CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load i8**, i8*** [[TMP30]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i8*, i8** [[TMP31]], i64 9 // CHECK1-NEXT: [[TMP32:%.*]] = load i8*, i8** [[ARRAYIDX2_I]], align 8 @@ -453,10 +459,10 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 // CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint i8* [[TMP43]] to i64 diff --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp --- a/clang/test/OpenMP/target_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_codegen.cpp @@ -322,7 +322,7 @@ // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_CASTED7:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS9:%.*]] = alloca [1 x i8*], align 8 @@ -404,13 +404,13 @@ // CHECK1-NEXT: store i8* null, i8** [[TMP29]], align 8 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 // CHECK1-NEXT: [[TMP33:%.*]] = load i16, i16* [[AA]], align 2 // CHECK1-NEXT: store i16 [[TMP33]], i16* [[TMP32]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: store i32 [[TMP35]], i32* [[TMP34]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 2 // CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 [[TMP37]], i32* [[TMP36]], align 4 // CHECK1-NEXT: [[TMP38:%.*]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 120, i64 12, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*), i64 -1) @@ -418,10 +418,10 @@ // CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP39]], i32 0, i32 0 // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP40]], i32 0, i32 0 // CHECK1-NEXT: [[TMP42:%.*]] = load i8*, i8** [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i64 12, i1 false) // CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP39]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to %struct.anon* +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to %struct.anon.0* // CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP44]], i32 0, i32 0 // CHECK1-NEXT: [[TMP47:%.*]] = bitcast [3 x i8*]* [[TMP46]] to i8* // CHECK1-NEXT: [[TMP48:%.*]] = bitcast i8** [[TMP30]] to i8* @@ -664,7 +664,7 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -675,24 +675,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 2 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK1-NEXT: ret void // // @@ -733,7 +736,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i16*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca [3 x i8*]*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [3 x i8*]*, align 8 @@ -751,7 +754,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -764,8 +767,8 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @@ -777,8 +780,8 @@ // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* [[TMP22]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 [[TMP25]], i32 [[TMP26]], i32 0, i8* null, i32 0, i8* null) #[[ATTR3]] @@ -807,30 +810,33 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -838,32 +844,35 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK1-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 2 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK1-NEXT: store i16 [[CONV1]], i16* [[AA]], align 2 // CHECK1-NEXT: ret void // // @@ -872,45 +881,47 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK1-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 // CHECK1-NEXT: ret void // // @@ -926,7 +937,7 @@ // CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -945,84 +956,95 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [10 x float]*, i64, float*, [5 x [10 x double]]*, i64, i64, double*, %struct.TT*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], [10 x float]* [[TMP0]], i64 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]]) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[TMP2]], float** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP5]], i64* [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: store double* [[TMP6]], double** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK1-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 -// CHECK1-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 -// CHECK1-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double -// CHECK1-NEXT: [[ADD6:%.*]] = fadd double [[CONV5]], 1.000000e+00 -// CHECK1-NEXT: [[CONV7:%.*]] = fptrunc double [[ADD6]] to float -// CHECK1-NEXT: store float [[CONV7]], float* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX8]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP10]] to double -// CHECK1-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 -// CHECK1-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK1-NEXT: store float [[CONV11]], float* [[ARRAYIDX8]], align 4 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX12]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load double, double* [[ARRAYIDX13]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD14]], double* [[ARRAYIDX13]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP12]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, double* [[ARRAYIDX16]], align 8 -// CHECK1-NEXT: [[ADD17:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD17]], double* [[ARRAYIDX16]], align 8 -// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[X]], align 8 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK1-NEXT: store i64 [[ADD18]], i64* [[X]], align 8 -// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load i8, i8* [[Y]], align 8 -// CHECK1-NEXT: [[CONV19:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[CONV19]], 1 -// CHECK1-NEXT: [[CONV21:%.*]] = trunc i32 [[ADD20]] to i8 -// CHECK1-NEXT: store i8 [[CONV21]], i8* [[Y]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK1-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK1-NEXT: store float [[CONV2]], float* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 3 +// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 +// CHECK1-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float +// CHECK1-NEXT: store float [[CONV6]], float* [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX7]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD9]], double* [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 [[TMP23]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX10]], i64 3 +// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], i64* [[X]], align 8 +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK1-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK1-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK1-NEXT: store i8 [[CONV16]], i8* [[Y]], align 8 // CHECK1-NEXT: ret void // // @@ -1030,46 +1052,50 @@ // CHECK1-SAME: (i64 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK1-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 +// CHECK1-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[NN]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK1-NEXT: [[NN:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[NN]], align 4 // CHECK1-NEXT: ret void // // @@ -1077,41 +1103,48 @@ // CHECK1-SAME: (i64 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK1-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK1-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[NN]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[NN]], i32** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[NN_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[NN]], i32** [[NN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[NN_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK1-NEXT: ret void // // @@ -1151,23 +1184,28 @@ // CHECK1-SAME: (i64 noundef [[VLA:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..20 to void (i32*, i32*, ...)*), i64 [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP0]], i64* [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..20 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..20 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK1-NEXT: [[F:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 // CHECK1-NEXT: ret void // // @@ -1472,7 +1510,7 @@ // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -1483,50 +1521,57 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double -// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], double* [[A]], align 8 -// CHECK1-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load double, double* [[A4]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], double* [[A4]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK1-NEXT: [[TMP6:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP6]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load double, double* [[A1]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], double* [[A1]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: [[TMP13:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i64 [[TMP13]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV2]], i16* [[ARRAYIDX3]], align 2 // CHECK1-NEXT: ret void // // @@ -1537,9 +1582,7 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 @@ -1548,58 +1591,62 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* -// CHECK1-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV2]], align 1 +// CHECK1-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK1-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK1-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK1-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 -// CHECK1-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK1-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK1-NEXT: store i8 [[CONV5]], i8* [[AAA]], align 1 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP8]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: ret void // // @@ -1609,54 +1656,57 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..29 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK1-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: ret void // // @@ -1687,7 +1737,7 @@ // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_CASTED4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS6:%.*]] = alloca [1 x i8*], align 4 @@ -1765,13 +1815,13 @@ // CHECK3-NEXT: store i8* null, i8** [[TMP27]], align 4 // CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = load i16, i16* [[AA]], align 2 // CHECK3-NEXT: store i16 [[TMP31]], i16* [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: store i32 [[TMP33]], i32* [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 2 // CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 [[TMP35]], i32* [[TMP34]], align 4 // CHECK3-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 72, i32 12, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*), i64 -1) @@ -1779,10 +1829,10 @@ // CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 // CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP38]], i32 0, i32 0 // CHECK3-NEXT: [[TMP40:%.*]] = load i8*, i8** [[TMP39]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// CHECK3-NEXT: [[TMP41:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i32 12, i1 false) // CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP43:%.*]] = bitcast i8* [[TMP40]] to %struct.anon* +// CHECK3-NEXT: [[TMP43:%.*]] = bitcast i8* [[TMP40]] to %struct.anon.0* // CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP42]], i32 0, i32 0 // CHECK3-NEXT: [[TMP45:%.*]] = bitcast [3 x i64]* [[TMP44]] to i8* // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP45]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false) @@ -2022,7 +2072,7 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -2031,24 +2081,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 2 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK3-NEXT: ret void // // @@ -2089,7 +2142,7 @@ // CHECK3-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 4 // CHECK3-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 4 // CHECK3-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i16*, align 4 // CHECK3-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca [3 x i8*]*, align 4 // CHECK3-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [3 x i8*]*, align 4 @@ -2107,7 +2160,7 @@ // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK3-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -2120,8 +2173,8 @@ // CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 // CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 // CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !25 -// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 -// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @@ -2133,8 +2186,8 @@ // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK3-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* [[TMP22]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 [[TMP25]], i32 [[TMP26]], i32 0, i8* null, i32 0, i8* null) #[[ATTR3]] @@ -2161,27 +2214,32 @@ // CHECK3-SAME: (i32 noundef [[A:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -2189,32 +2247,35 @@ // CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 2 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK3-NEXT: store i16 [[CONV1]], i16* [[AA]], align 2 // CHECK3-NEXT: ret void // // @@ -2223,42 +2284,46 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK3-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 // CHECK3-NEXT: ret void // // @@ -2274,7 +2339,7 @@ // CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -2292,82 +2357,95 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x float]*, i32, float*, [5 x [10 x double]]*, i32, i32, double*, %struct.TT*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP9]], [10 x float]* [[TMP0]], i32 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[TMP2]], float** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK3-NEXT: store double* [[TMP6]], double** [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK3-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 -// CHECK3-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 -// CHECK3-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP9]] to double -// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK3-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK3-NEXT: store float [[CONV2]], float* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 3 +// CHECK3-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK3-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 // CHECK3-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK3-NEXT: store float [[CONV6]], float* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX7]], align 4 -// CHECK3-NEXT: [[CONV8:%.*]] = fpext float [[TMP10]] to double -// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK3-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK3-NEXT: store float [[CONV10]], float* [[ARRAYIDX7]], align 4 -// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX11]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load double, double* [[ARRAYIDX12]], align 8 -// CHECK3-NEXT: [[ADD13:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD13]], double* [[ARRAYIDX12]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP12]] -// CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX14]], i32 3 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, double* [[ARRAYIDX15]], align 8 -// CHECK3-NEXT: [[ADD16:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 -// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP14:%.*]] = load i64, i64* [[X]], align 4 -// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK3-NEXT: store i64 [[ADD17]], i64* [[X]], align 4 -// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load i8, i8* [[Y]], align 4 -// CHECK3-NEXT: [[CONV18:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK3-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK3-NEXT: store i8 [[CONV20]], i8* [[Y]], align 4 +// CHECK3-NEXT: store float [[CONV6]], float* [[ARRAYIDX3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i32 0, i32 1 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX7]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD9]], double* [[ARRAYIDX8]], align 8 +// CHECK3-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 [[TMP23]] +// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX10]], i32 3 +// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX11]], align 8 +// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8 +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4 +// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK3-NEXT: store i64 [[ADD13]], i64* [[X]], align 4 +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK3-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK3-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK3-NEXT: store i8 [[CONV16]], i8* [[Y]], align 4 // CHECK3-NEXT: ret void // // @@ -2375,41 +2453,49 @@ // CHECK3-SAME: (i32 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[NN_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store i32 [[NN]], i32* [[NN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[NN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[NN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[NN_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[NN_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK3-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[NN]], i32* [[NN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[NN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[NN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[NN_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK3-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[NN]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32 0, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK3-NEXT: [[NN:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[NN]], i32* [[NN_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[NN]], align 4 // CHECK3-NEXT: ret void // // @@ -2417,38 +2503,47 @@ // CHECK3-SAME: (i32 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[NN_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store i32 [[NN]], i32* [[NN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[NN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[NN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[NN_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 +// CHECK3-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[NN]], i32* [[NN_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i32* [[NN_ADDR]]) +// CHECK3-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[NN]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[NN]], i32** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[NN_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[NN]], i32** [[NN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[NN_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 // CHECK3-NEXT: ret void // // @@ -2487,23 +2582,28 @@ // CHECK3-SAME: (i32 noundef [[VLA:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..20 to void (i32*, i32*, ...)*), i32 [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP0]], i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..20 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..20 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[VLA:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 4 // CHECK3-NEXT: [[F:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK3-NEXT: ret void // // @@ -2805,7 +2905,7 @@ // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -2815,48 +2915,57 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// CHECK3-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], double* [[A]], align 4 -// CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = load double, double* [[A3]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], double* [[A3]], align 4 -// CHECK3-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK3-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP6]] -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 +// CHECK3-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load double, double* [[A1]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], double* [[A1]], align 4 +// CHECK3-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-NEXT: [[TMP13:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 [[TMP13]] +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: store i16 [[CONV2]], i16* [[ARRAYIDX3]], align 2 // CHECK3-NEXT: ret void // // @@ -2867,9 +2976,7 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 @@ -2877,56 +2984,62 @@ // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* -// CHECK3-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK3-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 -// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK3-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 -// CHECK3-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK3-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK3-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK3-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK3-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK3-NEXT: store i8 [[CONV5]], i8* [[AAA]], align 1 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP8]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: ret void // // @@ -2936,51 +3049,56 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..29 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK3-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: ret void // // @@ -2997,7 +3115,7 @@ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -3008,24 +3126,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 2 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK9-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK9-NEXT: ret void // // @@ -3033,32 +3154,35 @@ // CHECK9-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK9-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 2 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK9-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK9-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK9-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK9-NEXT: store i16 [[CONV1]], i16* [[AA]], align 2 // CHECK9-NEXT: ret void // // @@ -3067,45 +3191,47 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK9-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 +// CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK9-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 // CHECK9-NEXT: ret void // // @@ -3121,7 +3247,7 @@ // CHECK9-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -3140,84 +3266,95 @@ // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [10 x float]*, i64, float*, [5 x [10 x double]]*, i64, i64, double*, %struct.TT*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], [10 x float]* [[TMP0]], i64 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]]) +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], i64* [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store float* [[TMP2]], float** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store i64 [[TMP5]], i64* [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK9-NEXT: store double* [[TMP6]], double** [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK9-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 -// CHECK9-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 -// CHECK9-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 -// CHECK9-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double -// CHECK9-NEXT: [[ADD6:%.*]] = fadd double [[CONV5]], 1.000000e+00 -// CHECK9-NEXT: [[CONV7:%.*]] = fptrunc double [[ADD6]] to float -// CHECK9-NEXT: store float [[CONV7]], float* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK9-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX8]], align 4 -// CHECK9-NEXT: [[CONV9:%.*]] = fpext float [[TMP10]] to double -// CHECK9-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 -// CHECK9-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK9-NEXT: store float [[CONV11]], float* [[ARRAYIDX8]], align 4 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 -// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX12]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP11:%.*]] = load double, double* [[ARRAYIDX13]], align 8 -// CHECK9-NEXT: [[ADD14:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD14]], double* [[ARRAYIDX13]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP12]] -// CHECK9-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 -// CHECK9-NEXT: [[TMP13:%.*]] = load double, double* [[ARRAYIDX16]], align 8 -// CHECK9-NEXT: [[ADD17:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD17]], double* [[ARRAYIDX16]], align 8 -// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[X]], align 8 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK9-NEXT: store i64 [[ADD18]], i64* [[X]], align 8 -// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP15:%.*]] = load i8, i8* [[Y]], align 8 -// CHECK9-NEXT: [[CONV19:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[CONV19]], 1 -// CHECK9-NEXT: [[CONV21:%.*]] = trunc i32 [[ADD20]] to i8 -// CHECK9-NEXT: store i8 [[CONV21]], i8* [[Y]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 7 +// CHECK9-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK9-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK9-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK9-NEXT: store float [[CONV2]], float* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 3 +// CHECK9-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK9-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK9-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 +// CHECK9-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float +// CHECK9-NEXT: store float [[CONV6]], float* [[ARRAYIDX3]], align 4 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i64 0, i64 1 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX7]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD9]], double* [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 [[TMP23]] +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX10]], i64 3 +// CHECK9-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8 +// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK9-NEXT: store i64 [[ADD13]], i64* [[X]], align 8 +// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK9-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK9-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK9-NEXT: store i8 [[CONV16]], i8* [[Y]], align 8 // CHECK9-NEXT: ret void // // @@ -3225,46 +3362,50 @@ // CHECK9-SAME: (i64 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK9-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK9-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[NN]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32 0, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK9-NEXT: [[NN:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[NN]], align 4 // CHECK9-NEXT: ret void // // @@ -3272,41 +3413,48 @@ // CHECK9-SAME: (i64 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK9-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 +// CHECK9-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK9-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[NN]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[NN]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[NN_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[NN]], i32** [[NN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[NN_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK9-NEXT: ret void // // @@ -3314,23 +3462,28 @@ // CHECK9-SAME: (i64 noundef [[VLA:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK9-NEXT: [[F:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 // CHECK9-NEXT: ret void // // @@ -3341,9 +3494,7 @@ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 @@ -3352,58 +3503,62 @@ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* -// CHECK9-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV2]], align 1 +// CHECK9-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK9-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK9-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK9-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK9-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK9-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK9-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK9-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 -// CHECK9-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK9-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK9-NEXT: store i8 [[CONV5]], i8* [[AAA]], align 1 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP8]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: ret void // // @@ -3415,7 +3570,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -3426,50 +3581,57 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]]) +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double -// CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double +// CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], double* [[A]], align 8 -// CHECK9-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP5:%.*]] = load double, double* [[A4]], align 8 -// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK9-NEXT: store double [[INC]], double* [[A4]], align 8 -// CHECK9-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK9-NEXT: [[TMP6:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP6]] -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK9-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// CHECK9-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = load double, double* [[A1]], align 8 +// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK9-NEXT: store double [[INC]], double* [[A1]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK9-NEXT: [[TMP13:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i64 [[TMP13]] +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK9-NEXT: store i16 [[CONV2]], i16* [[ARRAYIDX3]], align 2 // CHECK9-NEXT: ret void // // @@ -3479,54 +3641,57 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK9-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK9-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: ret void // // @@ -3536,7 +3701,7 @@ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -3545,24 +3710,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 2 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK11-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK11-NEXT: ret void // // @@ -3570,32 +3738,35 @@ // CHECK11-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 2 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK11-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK11-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK11-NEXT: store i16 [[CONV1]], i16* [[AA]], align 2 // CHECK11-NEXT: ret void // // @@ -3604,42 +3775,46 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK11-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK11-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 +// CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK11-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 // CHECK11-NEXT: ret void // // @@ -3655,7 +3830,7 @@ // CHECK11-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -3673,82 +3848,95 @@ // CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x float]*, i32, float*, [5 x [10 x double]]*, i32, i32, double*, %struct.TT*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP9]], [10 x float]* [[TMP0]], i32 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]]) +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store float* [[TMP2]], float** [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK11-NEXT: store double* [[TMP6]], double** [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK11-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 -// CHECK11-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 -// CHECK11-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 -// CHECK11-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = fpext float [[TMP9]] to double -// CHECK11-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 7 +// CHECK11-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK11-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK11-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK11-NEXT: store float [[CONV2]], float* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 3 +// CHECK11-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK11-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK11-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 // CHECK11-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK11-NEXT: store float [[CONV6]], float* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK11-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX7]], align 4 -// CHECK11-NEXT: [[CONV8:%.*]] = fpext float [[TMP10]] to double -// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK11-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK11-NEXT: store float [[CONV10]], float* [[ARRAYIDX7]], align 4 -// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 -// CHECK11-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX11]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP11:%.*]] = load double, double* [[ARRAYIDX12]], align 8 -// CHECK11-NEXT: [[ADD13:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD13]], double* [[ARRAYIDX12]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP12]] -// CHECK11-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX14]], i32 3 -// CHECK11-NEXT: [[TMP13:%.*]] = load double, double* [[ARRAYIDX15]], align 8 -// CHECK11-NEXT: [[ADD16:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 -// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, i64* [[X]], align 4 -// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK11-NEXT: store i64 [[ADD17]], i64* [[X]], align 4 -// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP15:%.*]] = load i8, i8* [[Y]], align 4 -// CHECK11-NEXT: [[CONV18:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK11-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK11-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK11-NEXT: store i8 [[CONV20]], i8* [[Y]], align 4 +// CHECK11-NEXT: store float [[CONV6]], float* [[ARRAYIDX3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i32 0, i32 1 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX7]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD9]], double* [[ARRAYIDX8]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 [[TMP23]] +// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX10]], i32 3 +// CHECK11-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX11]], align 8 +// CHECK11-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8 +// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4 +// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK11-NEXT: store i64 [[ADD13]], i64* [[X]], align 4 +// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK11-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK11-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK11-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK11-NEXT: store i8 [[CONV16]], i8* [[Y]], align 4 // CHECK11-NEXT: ret void // // @@ -3756,41 +3944,49 @@ // CHECK11-SAME: (i32 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[NN_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[NN]], i32* [[NN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[NN_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], i32* [[NN_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[NN_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[NN_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK11-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[NN]], i32* [[NN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[NN_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], i32* [[NN_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[NN_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[NN]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32 0, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK11-NEXT: [[NN:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[NN]], i32* [[NN_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[NN]], align 4 // CHECK11-NEXT: ret void // // @@ -3798,38 +3994,47 @@ // CHECK11-SAME: (i32 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[NN_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store i32 [[NN]], i32* [[NN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[NN_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], i32* [[NN_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[NN_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK11-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[NN]], i32* [[NN_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* [[NN_ADDR]]) +// CHECK11-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[NN]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[NN]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[NN_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[NN]], i32** [[NN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[NN_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 // CHECK11-NEXT: ret void // // @@ -3837,23 +4042,28 @@ // CHECK11-SAME: (i32 noundef [[VLA:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32 [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[VLA:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 // CHECK11-NEXT: [[F:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK11-NEXT: ret void // // @@ -3864,9 +4074,7 @@ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 @@ -3874,56 +4082,62 @@ // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK11-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* -// CHECK11-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK11-NEXT: store i8 [[TMP6]], i8* [[TMP5]], align 2 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP7]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK11-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK11-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK11-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK11-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 -// CHECK11-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK11-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 2 +// CHECK11-NEXT: store i8 [[TMP6]], i8* [[AAA]], align 1 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK11-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK11-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK11-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK11-NEXT: store i8 [[CONV5]], i8* [[AAA]], align 1 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP8]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: ret void // // @@ -3935,7 +4149,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -3945,48 +4159,57 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]]) +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// CHECK11-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], double* [[A]], align 4 -// CHECK11-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP5:%.*]] = load double, double* [[A3]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK11-NEXT: store double [[INC]], double* [[A3]], align 4 -// CHECK11-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK11-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP6]] -// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK11-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 +// CHECK11-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP12:%.*]] = load double, double* [[A1]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK11-NEXT: store double [[INC]], double* [[A1]], align 4 +// CHECK11-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK11-NEXT: [[TMP13:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 [[TMP13]] +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK11-NEXT: store i16 [[CONV2]], i16* [[ARRAYIDX3]], align 2 // CHECK11-NEXT: ret void // // @@ -3996,50 +4219,55 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK11-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK11-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK11-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/target_teams_distribute_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_codegen.cpp @@ -314,7 +314,7 @@ // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_CASTED7:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS9:%.*]] = alloca [1 x i8*], align 8 @@ -392,13 +392,13 @@ // CHECK1-NEXT: store i8* null, i8** [[TMP29]], align 8 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 // CHECK1-NEXT: [[TMP33:%.*]] = load i16, i16* [[AA]], align 2 // CHECK1-NEXT: store i16 [[TMP33]], i16* [[TMP32]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: store i32 [[TMP35]], i32* [[TMP34]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 2 // CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 [[TMP37]], i32* [[TMP36]], align 4 // CHECK1-NEXT: [[TMP38:%.*]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i64 120, i64 12, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*), i64 -1) @@ -406,10 +406,10 @@ // CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP39]], i32 0, i32 0 // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP40]], i32 0, i32 0 // CHECK1-NEXT: [[TMP42:%.*]] = load i8*, i8** [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i64 12, i1 false) // CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP39]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to %struct.anon* +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to %struct.anon.0* // CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP44]], i32 0, i32 0 // CHECK1-NEXT: [[TMP47:%.*]] = bitcast [3 x i8*]* [[TMP46]] to i8* // CHECK1-NEXT: [[TMP48:%.*]] = bitcast i8** [[TMP30]] to i8* @@ -626,7 +626,7 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -637,20 +637,20 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 2 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -660,51 +660,54 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -745,7 +748,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i16*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca [3 x i8*]*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [3 x i8*]*, align 8 @@ -763,7 +766,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -776,8 +779,8 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !21 // CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !21 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !21 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !21 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !21 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @@ -789,8 +792,8 @@ // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK1-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10) #[[ATTR3]] @@ -820,23 +823,23 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -846,54 +849,57 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -901,23 +907,23 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK1-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 2 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -927,56 +933,59 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK1-NEXT: [[TMP11:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 +// CHECK1-NEXT: store i16 [[CONV3]], i16* [[AA]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -985,31 +994,29 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1019,61 +1026,65 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK1-NEXT: ret void // // @@ -1090,8 +1101,7 @@ // CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -1112,33 +1122,40 @@ // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [10 x float]*, i64, float*, [5 x [10 x double]]*, i64, i64, double*, %struct.TT*, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], [10 x float]* [[TMP0]], i64 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]], i64 [[TMP11]]) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store float* [[TMP2]], float** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP5]], i64* [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK1-NEXT: store double* [[TMP6]], double** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK1-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV5]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1148,123 +1165,127 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 -// CHECK1-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 -// CHECK1-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 10 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 8 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double -// CHECK1-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 -// CHECK1-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK1-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double -// CHECK1-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 -// CHECK1-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK1-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !22 -// CHECK1-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !22 -// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !22 -// CHECK1-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !22 -// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !22 -// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK1-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !22 -// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !22 -// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 -// CHECK1-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK1-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[A]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP33:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP33]] to double +// CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK1-NEXT: store float [[CONV5]], float* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 3 +// CHECK1-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double +// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 +// CHECK1-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float +// CHECK1-NEXT: store float [[CONV9]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX10]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX11]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 [[TMP36]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX13]], i64 3 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK1-NEXT: store i64 [[ADD16]], i64* [[X]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP39:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK1-NEXT: store i8 [[CONV19]], i8* [[Y]], align 8, !llvm.access.group !22 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK1-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK1-NEXT: store i32 [[ADD27]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK1-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) // CHECK1-NEXT: ret void // // @@ -1606,7 +1627,7 @@ // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -1617,24 +1638,28 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1644,73 +1669,76 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: store double [[ADD5]], double* [[A]], align 8 -// CHECK1-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, double* [[A6]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], double* [[A6]], align 8 -// CHECK1-NEXT: [[CONV7:%.*]] = fptosi double [[INC]] to i16 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP14]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: store double [[ADD2]], double* [[A]], align 8 +// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = load double, double* [[A3]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], double* [[A3]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i64 [[TMP21]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -1722,10 +1750,7 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 @@ -1736,145 +1761,149 @@ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 -// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 -// CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* -// CHECK1-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV2]], align 2 +// CHECK1-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV3]], align 1 +// CHECK1-NEXT: store i8 [[TMP8]], i8* [[TMP7]], align 2 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP9]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I8:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] -// CHECK1-NEXT: [[SUB6:%.*]] = sub i32 [[SUB]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB6]], 1 +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP5]], align 8 +// CHECK1-NEXT: store i16 [[TMP6]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 2 +// CHECK1-NEXT: store i8 [[TMP8]], i8* [[AAA]], align 1 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 -// CHECK1-NEXT: [[SUB7:%.*]] = sub i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB7]], i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add i32 [[TMP17]], 1 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp ult i32 [[TMP16]], [[ADD10]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] +// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2 -// CHECK1-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 -// CHECK1-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK1-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[I5]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 +// CHECK1-NEXT: store i16 [[CONV12]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP32:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 +// CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 +// CHECK1-NEXT: store i8 [[CONV15]], i8* [[AAA]], align 1 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP10]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK1-NEXT: store i32 [[ADD16]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD21:%.*]] = add i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 +// CHECK1-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -1886,34 +1915,33 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1923,67 +1951,71 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -2014,7 +2046,7 @@ // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_CASTED4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS6:%.*]] = alloca [1 x i8*], align 4 @@ -2088,13 +2120,13 @@ // CHECK3-NEXT: store i8* null, i8** [[TMP27]], align 4 // CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = load i16, i16* [[AA]], align 2 // CHECK3-NEXT: store i16 [[TMP31]], i16* [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: store i32 [[TMP33]], i32* [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 2 // CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 [[TMP35]], i32* [[TMP34]], align 4 // CHECK3-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 72, i32 12, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*), i64 -1) @@ -2102,10 +2134,10 @@ // CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 // CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP38]], i32 0, i32 0 // CHECK3-NEXT: [[TMP40:%.*]] = load i8*, i8** [[TMP39]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// CHECK3-NEXT: [[TMP41:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i32 12, i1 false) // CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP43:%.*]] = bitcast i8* [[TMP40]] to %struct.anon* +// CHECK3-NEXT: [[TMP43:%.*]] = bitcast i8* [[TMP40]] to %struct.anon.0* // CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP42]], i32 0, i32 0 // CHECK3-NEXT: [[TMP45:%.*]] = bitcast [3 x i64]* [[TMP44]] to i8* // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP45]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false) @@ -2320,7 +2352,7 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -2329,20 +2361,20 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 2 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2352,51 +2384,54 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -2437,7 +2472,7 @@ // CHECK3-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 4 // CHECK3-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 4 // CHECK3-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i16*, align 4 // CHECK3-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca [3 x i8*]*, align 4 // CHECK3-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [3 x i8*]*, align 4 @@ -2455,7 +2490,7 @@ // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK3-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -2468,8 +2503,8 @@ // CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !22 // CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !22 // CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !22 -// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 -// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 +// CHECK3-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 +// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 // CHECK3-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !22 // CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !22 // CHECK3-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @@ -2481,8 +2516,8 @@ // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK3-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10) #[[ATTR3]] @@ -2510,21 +2545,22 @@ // CHECK3-SAME: (i32 noundef [[A:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2534,53 +2570,57 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -2588,23 +2628,23 @@ // CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 2 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2614,56 +2654,59 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK3-NEXT: [[TMP11:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 +// CHECK3-NEXT: store i16 [[CONV3]], i16* [[AA]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -2672,29 +2715,28 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2704,60 +2746,65 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK3-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK3-NEXT: ret void // // @@ -2774,8 +2821,7 @@ // CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -2794,31 +2840,40 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x float]*, i32, float*, [5 x [10 x double]]*, i32, i32, double*, %struct.TT*, i32)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP9]], [10 x float]* [[TMP0]], i32 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[TMP2]], float** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK3-NEXT: store double* [[TMP6]], double** [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK3-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2828,121 +2883,127 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 -// CHECK3-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 -// CHECK3-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[A_ADDR]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double -// CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[A]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP33:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP33]] to double +// CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK3-NEXT: store float [[CONV5]], float* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 3 +// CHECK3-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double +// CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK3-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK3-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK3-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK3-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !23 -// CHECK3-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !23 -// CHECK3-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK3-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] -// CHECK3-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !23 -// CHECK3-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !23 -// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK3-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK3-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK3-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: store float [[CONV9]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i32 0, i32 1 +// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX10]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX11]], align 8, !llvm.access.group !23 +// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 [[TMP36]] +// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX13]], i32 3 +// CHECK3-NEXT: [[TMP37:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !23 +// CHECK3-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !23 +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP38:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK3-NEXT: store i64 [[ADD16]], i64* [[X]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP39:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK3-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK3-NEXT: store i8 [[CONV19]], i8* [[Y]], align 4, !llvm.access.group !23 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK3-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK3-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK3-NEXT: store i32 [[ADD25]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK3-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK3-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK3-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) // CHECK3-NEXT: ret void // // @@ -3280,7 +3341,7 @@ // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -3290,23 +3351,28 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3316,72 +3382,76 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: store double [[ADD4]], double* [[A]], align 4 -// CHECK3-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], double* [[A5]], align 4 -// CHECK3-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK3-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP14]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: store double [[ADD2]], double* [[A]], align 4 +// CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = load double, double* [[A3]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], double* [[A3]], align 4 +// CHECK3-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 [[TMP21]] +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK3-NEXT: ret void // // @@ -3393,10 +3463,7 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 @@ -3405,141 +3472,149 @@ // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* -// CHECK3-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK3-NEXT: store i8 [[TMP8]], i8* [[TMP7]], align 2 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP9]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 -// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] -// CHECK3-NEXT: [[SUB4:%.*]] = sub i32 [[SUB]], 1 -// CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB4]], 1 +// CHECK3-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP5]], align 4 +// CHECK3-NEXT: store i16 [[TMP6]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 2 +// CHECK3-NEXT: store i8 [[TMP8]], i8* [[AAA]], align 1 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK3-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 -// CHECK3-NEXT: [[SUB5:%.*]] = sub i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB5]], i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP17]], 1 -// CHECK3-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP16]], [[ADD8]] -// CHECK3-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK3-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] +// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK3-NEXT: [[ADD10:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 -// CHECK3-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 -// CHECK3-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK3-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 -// CHECK3-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK3-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK3-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[I5]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD10]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 +// CHECK3-NEXT: store i16 [[CONV12]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP32:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK3-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 +// CHECK3-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 +// CHECK3-NEXT: store i8 [[CONV15]], i8* [[AAA]], align 1 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP10]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK3-NEXT: store i32 [[ADD16]], i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD19:%.*]] = add i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 +// CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK3-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -3551,32 +3626,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3586,66 +3661,71 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK3-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK3-NEXT: ret void // // @@ -3662,7 +3742,7 @@ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -3673,20 +3753,20 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 2 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3696,51 +3776,54 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK9-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -3748,23 +3831,23 @@ // CHECK9-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK9-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 2 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3774,56 +3857,59 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK9-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 +// CHECK9-NEXT: store i16 [[CONV3]], i16* [[AA]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK9-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -3832,31 +3918,29 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK9-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3866,61 +3950,65 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK9-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK9-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK9-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK9-NEXT: ret void // // @@ -3937,8 +4025,7 @@ // CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 // CHECK9-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -3959,33 +4046,40 @@ // CHECK9-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 -// CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [10 x float]*, i64, float*, [5 x [10 x double]]*, i64, i64, double*, %struct.TT*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], [10 x float]* [[TMP0]], i64 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]], i64 [[TMP11]]) +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], i64* [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store float* [[TMP2]], float** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK9-NEXT: store i64 [[TMP5]], i64* [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK9-NEXT: store double* [[TMP6]], double** [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK9-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV5]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 -// CHECK9-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3995,123 +4089,127 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 -// CHECK9-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 -// CHECK9-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 7 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 9 +// CHECK9-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 10 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 8 +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double -// CHECK9-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 -// CHECK9-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK9-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK9-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double -// CHECK9-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 -// CHECK9-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK9-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 -// CHECK9-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !13 -// CHECK9-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK9-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] -// CHECK9-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK9-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !13 -// CHECK9-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !13 -// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !13 -// CHECK9-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK9-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !13 -// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !13 -// CHECK9-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK9-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 -// CHECK9-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK9-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[A]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP33:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[CONV:%.*]] = fpext float [[TMP33]] to double +// CHECK9-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK9-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK9-NEXT: store float [[CONV5]], float* [[ARRAYIDX]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 3 +// CHECK9-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double +// CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 +// CHECK9-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float +// CHECK9-NEXT: store float [[CONV9]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i64 0, i64 1 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX10]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX11]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 [[TMP36]] +// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX13]], i64 3 +// CHECK9-NEXT: [[TMP37:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK9-NEXT: store i64 [[ADD16]], i64* [[X]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP39:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK9-NEXT: store i8 [[CONV19]], i8* [[Y]], align 8, !llvm.access.group !13 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK9-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK9-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK9-NEXT: store i32 [[ADD27]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK9-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK9-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK9-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) // CHECK9-NEXT: ret void // // @@ -4123,10 +4221,7 @@ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 @@ -4137,145 +4232,149 @@ // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 -// CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 -// CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* -// CHECK9-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV2]], align 2 +// CHECK9-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV3]], align 1 +// CHECK9-NEXT: store i8 [[TMP8]], i8* [[TMP7]], align 2 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP9]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK9-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] -// CHECK9-NEXT: [[SUB6:%.*]] = sub i32 [[SUB]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add i32 [[SUB6]], 1 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP5]], align 8 +// CHECK9-NEXT: store i16 [[TMP6]], i16* [[AA]], align 2 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 2 +// CHECK9-NEXT: store i8 [[TMP8]], i8* [[AAA]], align 1 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK9-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 -// CHECK9-NEXT: [[SUB7:%.*]] = sub i32 [[DIV]], 1 -// CHECK9-NEXT: store i32 [[SUB7]], i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK9-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[I]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add i32 [[TMP17]], 1 -// CHECK9-NEXT: [[CMP11:%.*]] = icmp ult i32 [[TMP16]], [[ADD10]] -// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK9-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2 -// CHECK9-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 -// CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 -// CHECK9-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK9-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2 -// CHECK9-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1 -// CHECK9-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 -// CHECK9-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK9-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK9-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK9-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[I5]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[A]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: store i32 [[ADD10]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 +// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK9-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 +// CHECK9-NEXT: store i16 [[CONV12]], i16* [[AA]], align 2 +// CHECK9-NEXT: [[TMP32:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK9-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 +// CHECK9-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 +// CHECK9-NEXT: store i8 [[CONV15]], i8* [[AAA]], align 1 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP10]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK9-NEXT: store i32 [[ADD16]], i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD21:%.*]] = add i32 [[TMP24]], 1 -// CHECK9-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 +// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK9-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -4289,7 +4388,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -4300,24 +4399,28 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]]) +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4327,73 +4430,76 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK9-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK9-NEXT: store double [[ADD5]], double* [[A]], align 8 -// CHECK9-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP13:%.*]] = load double, double* [[A6]], align 8 -// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK9-NEXT: store double [[INC]], double* [[A6]], align 8 -// CHECK9-NEXT: [[CONV7:%.*]] = fptosi double [[INC]] to i16 -// CHECK9-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP14]] -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK9-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK9-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK9-NEXT: store double [[ADD2]], double* [[A]], align 8 +// CHECK9-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP20:%.*]] = load double, double* [[A3]], align 8 +// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK9-NEXT: store double [[INC]], double* [[A3]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK9-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i64 [[TMP21]] +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK9-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK9-NEXT: ret void // // @@ -4403,34 +4509,33 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4440,67 +4545,71 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK9-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK9-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK9-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK9-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // // @@ -4510,7 +4619,7 @@ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -4519,20 +4628,20 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 2 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4542,51 +4651,54 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK11-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // @@ -4594,23 +4706,23 @@ // CHECK11-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 2 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4620,56 +4732,59 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK11-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK11-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK11-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK11-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 +// CHECK11-NEXT: store i16 [[CONV3]], i16* [[AA]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // @@ -4678,29 +4793,28 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4710,60 +4824,65 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK11-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK11-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK11-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK11-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK11-NEXT: ret void // // @@ -4780,8 +4899,7 @@ // CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -4800,31 +4918,40 @@ // CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x float]*, i32, float*, [5 x [10 x double]]*, i32, i32, double*, %struct.TT*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP9]], [10 x float]* [[TMP0]], i32 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]], i32 [[TMP11]]) +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store float* [[TMP2]], float** [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK11-NEXT: store double* [[TMP6]], double** [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK11-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 -// CHECK11-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4834,121 +4961,127 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 -// CHECK11-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 -// CHECK11-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 7 +// CHECK11-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 9 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[A_ADDR]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double -// CHECK11-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[A]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP33:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[CONV:%.*]] = fpext float [[TMP33]] to double +// CHECK11-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK11-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK11-NEXT: store float [[CONV5]], float* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 3 +// CHECK11-NEXT: [[TMP34:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double +// CHECK11-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK11-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK11-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK11-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK11-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK11-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK11-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 -// CHECK11-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !14 -// CHECK11-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !14 -// CHECK11-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK11-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] -// CHECK11-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK11-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !14 -// CHECK11-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !14 -// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK11-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK11-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK11-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK11-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: store float [[CONV9]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i32 0, i32 1 +// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX10]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP35:%.*]] = load double, double* [[ARRAYIDX11]], align 8, !llvm.access.group !14 +// CHECK11-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD12]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK11-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 [[TMP36]] +// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX13]], i32 3 +// CHECK11-NEXT: [[TMP37:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !14 +// CHECK11-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !14 +// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK11-NEXT: store i64 [[ADD16]], i64* [[X]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP39:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK11-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK11-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK11-NEXT: store i8 [[CONV19]], i8* [[Y]], align 4, !llvm.access.group !14 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK11-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK11-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK11-NEXT: store i32 [[ADD25]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK11-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK11-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK11-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) // CHECK11-NEXT: ret void // // @@ -4960,10 +5093,7 @@ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 @@ -4972,141 +5102,149 @@ // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK11-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* -// CHECK11-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK11-NEXT: store i8 [[TMP8]], i8* [[TMP7]], align 2 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP9]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK11-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] -// CHECK11-NEXT: [[SUB4:%.*]] = sub i32 [[SUB]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add i32 [[SUB4]], 1 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP5]], align 4 +// CHECK11-NEXT: store i16 [[TMP6]], i16* [[AA]], align 2 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 2 +// CHECK11-NEXT: store i8 [[TMP8]], i8* [[AAA]], align 1 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK11-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 -// CHECK11-NEXT: [[SUB5:%.*]] = sub i32 [[DIV]], 1 -// CHECK11-NEXT: store i32 [[SUB5]], i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK11-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[I]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add i32 [[TMP17]], 1 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP16]], [[ADD8]] -// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK11-NEXT: [[ADD10:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK11-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 -// CHECK11-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK11-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 -// CHECK11-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK11-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 -// CHECK11-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK11-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK11-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK11-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[I5]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[A]], align 4 +// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: store i32 [[ADD10]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 +// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK11-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 +// CHECK11-NEXT: store i16 [[CONV12]], i16* [[AA]], align 2 +// CHECK11-NEXT: [[TMP32:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK11-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 +// CHECK11-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 +// CHECK11-NEXT: store i8 [[CONV15]], i8* [[AAA]], align 1 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP10]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK11-NEXT: store i32 [[ADD16]], i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD19:%.*]] = add i32 [[TMP24]], 1 -// CHECK11-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 +// CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -5120,7 +5258,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -5130,23 +5268,28 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]]) +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5156,72 +5299,76 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK11-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK11-NEXT: store double [[ADD4]], double* [[A]], align 4 -// CHECK11-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK11-NEXT: store double [[INC]], double* [[A5]], align 4 -// CHECK11-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK11-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP14]] -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK11-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK11-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK11-NEXT: store double [[ADD2]], double* [[A]], align 4 +// CHECK11-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP20:%.*]] = load double, double* [[A3]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK11-NEXT: store double [[INC]], double* [[A3]], align 4 +// CHECK11-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK11-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 [[TMP21]] +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK11-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK11-NEXT: ret void // // @@ -5231,32 +5378,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5266,65 +5413,70 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK11-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK11-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK11-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK11-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp @@ -143,18 +143,21 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -166,68 +169,70 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK1-NEXT: store i32 [[ADD6]], i32* [[J]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX8]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -287,18 +292,21 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -310,66 +318,68 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], i32* [[J]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i32 0, i32 [[TMP13]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -516,8 +526,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -528,161 +537,167 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[M_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[M_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], i64* [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP9]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[M_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[M]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]] -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB11]], i64* [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 // CHECK9-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK9-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP19]], [[CONV20]] -// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK9-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK9-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK9-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK9-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP22]], [[CONV27]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK9-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK9-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK9-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP21]], [[MUL33]] -// CHECK9-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK9-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK9-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I13]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK9-NEXT: [[TMP26:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[J14]], align 4 -// CHECK9-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM38]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX39]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], i32* [[I9]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], i32* [[J10]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[I9]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[J10]], align 4 +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX35]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP28]], 1 -// CHECK9-NEXT: store i64 [[ADD40]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -724,18 +739,21 @@ // CHECK9-SAME: ([10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x [2 x i32]]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x [2 x i32]]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x [2 x i32]]* [[TMP0]], [10 x [2 x i32]]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -747,67 +765,69 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK9-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK9-NEXT: store i32 [[ADD6]], i32* [[J]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX8]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -951,8 +971,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -961,155 +980,165 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[M_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[M_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP9]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[M]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 // CHECK11-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP19]], [[CONV18]] -// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV21]], i32* [[I11]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP22]], [[CONV25]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP21]], [[MUL31]] -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK11-NEXT: store i32 [[CONV35]], i32* [[J12]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I11]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = mul nsw i32 [[TMP25]], [[TMP1]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[J12]], align 4 -// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i32 [[TMP27]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX36]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK11-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK11-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK11-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK11-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK11-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK11-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV19]], i32* [[I9]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK11-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK11-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK11-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK11-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK11-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK11-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK11-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK11-NEXT: store i32 [[CONV33]], i32* [[J10]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[I9]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = mul nsw i32 [[TMP33]], [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[J10]], align 4 +// CHECK11-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i32 [[TMP35]] +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX34]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP28]], 1 -// CHECK11-NEXT: store i64 [[ADD37]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD35:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK11-NEXT: store i64 [[ADD35]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -1151,18 +1180,21 @@ // CHECK11-SAME: ([10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x [2 x i32]]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x [2 x i32]]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x [2 x i32]]* [[TMP0]], [10 x [2 x i32]]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1174,65 +1206,67 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], i32* [[J]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP2]], i32 0, i32 [[TMP13]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp @@ -215,18 +215,21 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -236,56 +239,58 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -293,18 +298,21 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -314,56 +322,58 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -371,18 +381,21 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -392,73 +405,75 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -562,18 +577,21 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -583,55 +601,57 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -639,18 +659,21 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -660,55 +683,57 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -716,18 +741,21 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -737,72 +765,74 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP13]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -1049,29 +1079,31 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1084,76 +1116,79 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1165,29 +1200,31 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1200,76 +1237,79 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1282,8 +1322,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -1292,129 +1331,134 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK9-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] +// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK9-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1499,18 +1543,21 @@ // CHECK9-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1520,55 +1567,57 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -1576,18 +1625,21 @@ // CHECK9-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1597,55 +1649,57 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -1653,18 +1707,21 @@ // CHECK9-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1674,72 +1731,74 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -1984,27 +2043,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2017,74 +2079,78 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP16]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2096,27 +2162,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2129,74 +2198,78 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP16]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2209,33 +2282,35 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2248,93 +2323,99 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 [[TMP26]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2419,18 +2500,21 @@ // CHECK11-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2440,54 +2524,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // @@ -2495,18 +2581,21 @@ // CHECK11-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2516,54 +2605,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // @@ -2571,18 +2662,21 @@ // CHECK11-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2592,71 +2686,73 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 [[TMP13]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp @@ -333,8 +333,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -345,147 +344,153 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 [[IDXPROM8]] -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8* +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP28]] +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP25]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP33]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done9: // CHECK1-NEXT: ret void // // @@ -684,7 +689,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 @@ -694,24 +699,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP7]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -719,118 +727,121 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca %struct.S.0*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 8 +// CHECK1-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S.0]* [[TMP6]] to %struct.S.0* +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP11]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP13:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP13]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[ARRAYIDX10]] to i8* -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S.0* [[TMP18]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[ARRAYIDX7]] to i8* +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[TMP24]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP25]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -1121,8 +1132,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1131,141 +1141,151 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC1]] to i8* -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP17]] -// CHECK3-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* -// CHECK3-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP24]] +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP25]] +// CHECK3-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK3-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], [[TMP28]] +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP25]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP33]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1463,7 +1483,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 @@ -1472,23 +1492,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 // CHECK3-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP7]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1496,115 +1520,119 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca %struct.S.0*, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 4 +// CHECK3-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S.0]* [[TMP6]] to %struct.S.0* +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP11]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP13:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP13]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP17]] -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* -// CHECK3-NEXT: [[TMP21:%.*]] = bitcast %struct.S.0* [[TMP18]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP23]] +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP25]] +// CHECK3-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* +// CHECK3-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[TMP24]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP25]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // @@ -1833,9 +1861,7 @@ // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK9-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 @@ -1843,34 +1869,31 @@ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load volatile i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* @g, align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 @@ -1879,68 +1902,73 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[G_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[G]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[G1]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[SIVAR]], align 4 +// CHECK9-NEXT: store i32* [[G1]], i32** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: store volatile i32 1, i32* [[TMP8]], align 4 -// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP12]], align 8 +// CHECK9-NEXT: store i32 1, i32* [[G]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK9-NEXT: store volatile i32 1, i32* [[TMP15]], align 4 +// CHECK9-NEXT: store i32 2, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[G]], i32** [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK9-NEXT: store i32* [[TMP18]], i32** [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP19]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK9-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp @@ -160,10 +160,7 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 @@ -173,134 +170,137 @@ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* // CHECK1-NEXT: store double* [[CONV1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double, double* [[CONV]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[G_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP0]], double* [[CONV4]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load volatile double, double* [[TMP2]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[G1_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP3]], double* [[CONV5]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV6]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* [[CONV3]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* -// CHECK1-NEXT: store float [[TMP7]], float* [[CONV7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load double, double* [[CONV]], align 8 +// CHECK1-NEXT: store double [[TMP1]], double* [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load volatile double, double* [[TMP3]], align 8 +// CHECK1-NEXT: store double [[TMP4]], double* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV2]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float, float* [[CONV3]], align 4 +// CHECK1-NEXT: store float [[TMP8]], float* [[TMP7]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G5:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G16:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR9:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G2:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G13:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], i64* [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[G_ADDR]] to double* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to double* -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* -// CHECK1-NEXT: store double* [[CONV1]], double** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double, double* [[TMP1]], align 8 +// CHECK1-NEXT: store double [[TMP2]], double* [[G]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 8 +// CHECK1-NEXT: store double [[TMP4]], double* [[G1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float, float* [[TMP7]], align 4 +// CHECK1-NEXT: store float [[TMP8]], float* [[SFVAR]], align 4 +// CHECK1-NEXT: store double* [[G1]], double** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[G16]], double** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[G13]], double** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: store double 1.000000e+00, double* [[G5]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[_TMP7]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP9]], align 8 -// CHECK1-NEXT: store i32 3, i32* [[SVAR8]], align 4 -// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR9]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[G5]], double** [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP7]], align 8 -// CHECK1-NEXT: store double* [[TMP12]], double** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[SVAR8]], i32** [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[SFVAR9]], float** [[TMP14]], align 8 +// CHECK1-NEXT: store double 1.000000e+00, double* [[G2]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load double*, double** [[_TMP4]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP18]], align 8 +// CHECK1-NEXT: store i32 3, i32* [[SVAR5]], align 4 +// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[G2]], double** [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load double*, double** [[_TMP4]], align 8 +// CHECK1-NEXT: store double* [[TMP21]], double** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SVAR5]], i32** [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[SFVAR6]], float** [[TMP23]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP18:%.*]] = load double, double* [[G5]], align 8 -// CHECK1-NEXT: store volatile double [[TMP18]], double* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load double*, double** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load double, double* [[TMP19]], align 8 -// CHECK1-NEXT: store volatile double [[TMP20]], double* [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK1-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load float, float* [[SFVAR9]], align 4 -// CHECK1-NEXT: store float [[TMP22]], float* [[CONV3]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load double, double* [[G2]], align 8 +// CHECK1-NEXT: store volatile double [[TMP27]], double* [[G]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load double*, double** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load double, double* [[TMP28]], align 8 +// CHECK1-NEXT: store volatile double [[TMP29]], double* [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[SVAR5]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], i32* [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load float, float* [[SFVAR6]], align 4 +// CHECK1-NEXT: store float [[TMP31]], float* [[SFVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -339,8 +339,7 @@ // CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 @@ -349,27 +348,29 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[SFVAR_ADDR]] to float* // CHECK3-NEXT: store double* [[TMP1]], double** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load float, float* [[CONV]], align 4 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[SFVAR_CASTED]] to float* -// CHECK3-NEXT: store float [[TMP5]], float* [[CONV1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[TMP0]], double* [[TMP2]], i32 [[TMP4]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[TMP0]], double** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[TMP4]], double** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[SVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float, float* [[CONV]], align 4 +// CHECK3-NEXT: store float [[TMP8]], float* [[TMP7]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -377,96 +378,101 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR4:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], i32* [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[SFVAR_ADDR]] to float* -// CHECK3-NEXT: store double* [[TMP1]], double** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float, float* [[TMP7]], align 4 +// CHECK3-NEXT: store float [[TMP8]], float* [[SFVAR]], align 4 +// CHECK3-NEXT: store double* [[TMP4]], double** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: store double 1.000000e+00, double* [[G2]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP11]], align 4 -// CHECK3-NEXT: store i32 3, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G2]], double** [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: store double* [[TMP14]], double** [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR5]], i32** [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[SFVAR6]], float** [[TMP16]], align 4 +// CHECK3-NEXT: store double 1.000000e+00, double* [[G]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP18]], align 4 +// CHECK3-NEXT: store i32 3, i32* [[SVAR3]], align 4 +// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR4]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: store double* [[TMP21]], double** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR3]], i32** [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[SFVAR4]], float** [[TMP23]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP20:%.*]] = load double, double* [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP20]], double* [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[TMP21]], align 4 -// CHECK3-NEXT: store volatile double [[TMP22]], double* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP23]], i32* [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP24]], float* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP27]], double* [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP28:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load double, double* [[TMP28]], align 4 +// CHECK3-NEXT: store volatile double [[TMP29]], double* [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[SVAR3]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], i32* [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load float, float* [[SFVAR4]], align 4 +// CHECK3-NEXT: store float [[TMP31]], float* [[SFVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -619,8 +625,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -632,61 +637,67 @@ // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK9-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP5]], i64 [[TMP7]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* -// CHECK9-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[SVAR]], align 4 +// CHECK9-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -696,108 +707,108 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store %struct.S* [[VAR6]], %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: [[TMP16:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* -// CHECK9-NEXT: [[TMP17:%.*]] = bitcast %struct.S* [[TMP14]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP16]], i8* align 4 [[TMP17]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK9-NEXT: [[TMP24:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* +// CHECK9-NEXT: [[TMP25:%.*]] = bitcast %struct.S* [[TMP22]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP1]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP26:%.*]] = bitcast [2 x %struct.S]* [[S_ARR5]] to %struct.S* -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN13]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN13]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK9-NEXT: [[TMP33:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP34:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN9]], [[TMP35]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP26]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP34]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done14: -// CHECK9-NEXT: [[TMP30:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[TMP3]] to i8* -// CHECK9-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[TMP30]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP33]], i32* [[CONV1]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP35]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done10: +// CHECK9-NEXT: [[TMP38:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[TMP11]] to i8* +// CHECK9-NEXT: [[TMP40:%.*]] = bitcast %struct.S* [[TMP38]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP39]], i8* align 4 [[TMP40]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[SVAR4]], align 4 +// CHECK9-NEXT: store i32 [[TMP41]], i32* [[SVAR]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN15]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP34]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP42]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done16: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done12: // CHECK9-NEXT: ret void // // @@ -965,7 +976,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 @@ -975,24 +986,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK9-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP7]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1001,27 +1015,30 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 8 +// CHECK9-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1031,106 +1048,106 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i64 0, i64 [[IDXPROM8]] -// CHECK9-NEXT: [[TMP16:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* -// CHECK9-NEXT: [[TMP17:%.*]] = bitcast %struct.S.0* [[TMP14]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP16]], i8* align 4 [[TMP17]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* +// CHECK9-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP1]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP26:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN11]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK9-NEXT: [[TMP31:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP32:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR]] to %struct.S.0* +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN8]], [[TMP33]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP26]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP32]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done12: -// CHECK9-NEXT: [[TMP30:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[TMP3]] to i8* -// CHECK9-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[TMP30]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP33]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP36:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP37:%.*]] = bitcast %struct.S.0* [[TMP9]] to i8* +// CHECK9-NEXT: [[TMP38:%.*]] = bitcast %struct.S.0* [[TMP36]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN13]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP33]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done14: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1323,8 +1340,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1334,27 +1350,31 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 // CHECK11-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[SVAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP5]], i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[SVAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1363,28 +1383,34 @@ // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[SVAR]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1394,106 +1420,106 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store %struct.S* [[VAR5]], %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK11-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: store i32 [[TMP12]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: [[TMP16:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* -// CHECK11-NEXT: [[TMP17:%.*]] = bitcast %struct.S* [[TMP14]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP16]], i8* align 4 [[TMP17]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP23]] +// CHECK11-NEXT: [[TMP24:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* +// CHECK11-NEXT: [[TMP25:%.*]] = bitcast %struct.S* [[TMP22]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK11-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP23]], i32* [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP1]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = bitcast [2 x %struct.S]* [[S_ARR4]] to %struct.S* -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN11]], [[TMP27]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK11-NEXT: [[TMP33:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP34:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN8]], [[TMP35]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP26]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP34]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP30:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[TMP3]] to i8* -// CHECK11-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[TMP30]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP33]], i32* [[SVAR_ADDR]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP35]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done9: +// CHECK11-NEXT: [[TMP38:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[TMP11]] to i8* +// CHECK11-NEXT: [[TMP40:%.*]] = bitcast %struct.S* [[TMP38]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP39]], i8* align 4 [[TMP40]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[SVAR4]], align 4 +// CHECK11-NEXT: store i32 [[TMP41]], i32* [[SVAR]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP34]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP42]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done11: // CHECK11-NEXT: ret void // // @@ -1660,7 +1686,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 @@ -1669,23 +1695,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 // CHECK11-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP7]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1694,26 +1724,30 @@ // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 4 +// CHECK11-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1723,104 +1757,104 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: store i32 [[TMP12]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: [[TMP16:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* -// CHECK11-NEXT: [[TMP17:%.*]] = bitcast %struct.S.0* [[TMP14]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP16]], i8* align 4 [[TMP17]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5]] to i8* +// CHECK11-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP23]], i32* [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP1]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN10]], [[TMP27]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK11-NEXT: [[TMP31:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP32:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR]] to %struct.S.0* +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN7]], [[TMP33]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP26]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP32]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP30:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[TMP3]] to i8* -// CHECK11-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[TMP30]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP33]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done8: +// CHECK11-NEXT: [[TMP36:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = bitcast %struct.S.0* [[TMP9]] to i8* +// CHECK11-NEXT: [[TMP38:%.*]] = bitcast %struct.S.0* [[TMP36]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP33]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done10: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp @@ -126,7 +126,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 @@ -139,21 +139,22 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [1000 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP5]], [1000 x i32]* [[TMP1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [1000 x i32]* [[TMP1]], [1000 x i32]** [[TMP6]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -163,92 +164,106 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP28]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -258,76 +273,83 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 2) -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 2) +// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: @@ -335,21 +357,21 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: cancel.exit: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK1-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: omp.precond.end: // CHECK1-NEXT: br label [[CANCEL_CONT]] @@ -363,29 +385,31 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 // CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [1000 x i32]*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP2]], [1000 x i32]* [[TMP0]], i32* [[TMP3]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [1000 x i32]* [[TMP0]], [1000 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[G_ADDR]], align 8 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -395,95 +419,110 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK1-NEXT: store i64 [[TMP22]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP24]], i64* [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP26]], i64* [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], i32* [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: store i32* [[TMP31]], i32** [[TMP30]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -493,89 +532,96 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 0 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX7]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -744,7 +790,7 @@ // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 @@ -757,21 +803,22 @@ // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [1000 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP5]], [1000 x i32]* [[TMP1]]) +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store [1000 x i32]* [[TMP1]], [1000 x i32]** [[TMP6]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -781,92 +828,106 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]]) +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK2-NEXT: store i64 [[TMP19]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK2-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP23]], i64* [[TMP22]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP28]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK2-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -876,76 +937,83 @@ // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK2-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK2-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 2) -// CHECK2-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK2-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 2) +// CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK2-NEXT: br i1 [[TMP29]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK2: .cancel.exit: // CHECK2-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK2: .cancel.continue: @@ -953,21 +1021,21 @@ // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK2-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: cancel.exit: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK2-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) // CHECK2-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK2: omp.precond.end: // CHECK2-NEXT: br label [[CANCEL_CONT]] @@ -981,29 +1049,31 @@ // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 // CHECK2-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK2-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [1000 x i32]*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP2]], [1000 x i32]* [[TMP0]], i32* [[TMP3]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store [1000 x i32]* [[TMP0]], [1000 x i32]** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[G_ADDR]], align 8 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[TMP4]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK2-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1013,95 +1083,110 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK2-NEXT: store i64 [[TMP20]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK2-NEXT: store i64 [[TMP22]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP24]], i64* [[TMP23]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP26]], i64* [[TMP25]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP28]], i32* [[TMP27]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP29]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK2-NEXT: store i32* [[TMP31]], i32** [[TMP30]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK2-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK2-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1111,89 +1196,96 @@ // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK2-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK2-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 0 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 0 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX7]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK2-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -1365,7 +1457,7 @@ // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 @@ -1375,20 +1467,22 @@ // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK4-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], i32* [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [1000 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP5]], [1000 x i32]* [[TMP1]]) +// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: store [1000 x i32]* [[TMP1]], [1000 x i32]** [[TMP6]], align 4 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK4-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1398,88 +1492,104 @@ // CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK4-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]]) +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP21]], i32* [[TMP20]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP26]], align 4 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK4-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1492,69 +1602,77 @@ // CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i32 0, i32 [[TMP25]] // CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 2) -// CHECK4-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK4-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK4-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 2) +// CHECK4-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK4-NEXT: br i1 [[TMP29]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK4: .cancel.exit: // CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK4: .cancel.continue: @@ -1562,21 +1680,21 @@ // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK4-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: cancel.exit: -// CHECK4-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK4-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) // CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: omp.precond.end: // CHECK4-NEXT: br label [[CANCEL_CONT]] @@ -1590,27 +1708,30 @@ // CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 // CHECK4-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK4-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[G_ADDR]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [1000 x i32]*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP2]], [1000 x i32]* [[TMP0]], i32* [[TMP3]]) +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: store [1000 x i32]* [[TMP0]], [1000 x i32]** [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[G_ADDR]], align 4 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[TMP4]], align 4 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK4-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1620,91 +1741,108 @@ // CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK4-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]) +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: store i32 [[TMP24]], i32* [[TMP23]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP27]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK4-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK4-NEXT: store i32* [[TMP29]], i32** [[TMP28]], align 4 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK4-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1717,82 +1855,90 @@ // CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK4-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK4-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK4-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 0 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i32 0, i32 [[TMP28]] +// CHECK4-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX6]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK4-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: // CHECK4-NEXT: ret void @@ -1812,7 +1958,7 @@ // CHECK10-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 // CHECK10-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK10-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) // CHECK10-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK10-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 @@ -1825,21 +1971,22 @@ // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [1000 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP5]], [1000 x i32]* [[TMP1]]) +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK10-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store [1000 x i32]* [[TMP1]], [1000 x i32]** [[TMP6]], align 8 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK10-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1849,92 +1996,106 @@ // CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK10-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK10-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK10-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK10-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK10-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK10-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK10: omp.precond.then: // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK10-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK10-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]]) +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK10-NEXT: store i64 [[TMP19]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK10-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: store i64 [[TMP23]], i64* [[TMP22]], align 8 +// CHECK10-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8 +// CHECK10-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 8 +// CHECK10-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP28]], align 8 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK10-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: omp.precond.end: // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1944,76 +2105,83 @@ // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK10-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK10-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK10-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK10-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK10-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 8 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK10-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK10-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK10: omp.precond.then: // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK10-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK10-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK10-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK10-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK10-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK10-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] +// CHECK10-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 2) -// CHECK10-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK10-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK10-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 2) +// CHECK10-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK10-NEXT: br i1 [[TMP29]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK10: .cancel.exit: // CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK10: .cancel.continue: @@ -2021,21 +2189,21 @@ // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK10-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK10-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: cancel.exit: -// CHECK10-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK10-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) // CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: omp.precond.end: // CHECK10-NEXT: br label [[CANCEL_CONT]] @@ -2049,29 +2217,31 @@ // CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK10-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 // CHECK10-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 -// CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK10-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK10-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK10-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [1000 x i32]*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP2]], [1000 x i32]* [[TMP0]], i32* [[TMP3]]) +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK10-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store [1000 x i32]* [[TMP0]], [1000 x i32]** [[TMP3]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32*, i32** [[G_ADDR]], align 8 +// CHECK10-NEXT: store i32* [[TMP5]], i32** [[TMP4]], align 8 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK10-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK10-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2081,95 +2251,110 @@ // CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK10-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK10-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 -// CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK10-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK10-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK10-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK10-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK10: omp.precond.then: // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK10-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK10-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]) +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK10-NEXT: store i64 [[TMP20]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK10-NEXT: store i64 [[TMP22]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: store i64 [[TMP24]], i64* [[TMP23]], align 8 +// CHECK10-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: store i64 [[TMP26]], i64* [[TMP25]], align 8 +// CHECK10-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP28]], i32* [[TMP27]], align 8 +// CHECK10-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP29]], align 8 +// CHECK10-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK10-NEXT: store i32* [[TMP31]], i32** [[TMP30]], align 8 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK10-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: omp.precond.end: // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK10-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2179,89 +2364,96 @@ // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK10-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 -// CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK10-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK10-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK10-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK10-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 8 +// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK10-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK10-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK10: omp.precond.then: // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK10-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK10-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: [[CONV:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK10-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK10-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK10-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK10-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +// CHECK10-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 0 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 0 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK10-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX7]], align 4 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK10-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]]) // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: omp.precond.end: // CHECK10-NEXT: ret void @@ -2274,7 +2466,7 @@ // CHECK12-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) // CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK12-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 @@ -2284,20 +2476,22 @@ // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK12-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [1000 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP5]], [1000 x i32]* [[TMP1]]) +// CHECK12-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK12-NEXT: store [1000 x i32]* [[TMP1]], [1000 x i32]** [[TMP6]], align 4 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK12-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK12-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2307,88 +2501,104 @@ // CHECK12-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK12-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK12-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK12-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK12: omp.precond.then: // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK12-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: br label [[COND_END:%.*]] // CHECK12: cond.false: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: br label [[COND_END]] // CHECK12: cond.end: -// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]]) +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP21]], i32* [[TMP20]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4 +// CHECK12-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK12-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK12-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP26]], align 4 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: -// CHECK12-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK12-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK12-NEXT: br label [[OMP_PRECOND_END]] // CHECK12: omp.precond.end: // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK12-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2401,69 +2611,77 @@ // CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK12-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK12-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK12-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK12: omp.precond.then: // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK12-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: br label [[COND_END:%.*]] // CHECK12: cond.false: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: br label [[COND_END]] // CHECK12: cond.end: -// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i32 0, i32 [[TMP25]] // CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 2) -// CHECK12-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK12-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK12-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 2) +// CHECK12-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK12-NEXT: br i1 [[TMP29]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK12: .cancel.exit: // CHECK12-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK12: .cancel.continue: @@ -2471,21 +2689,21 @@ // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: -// CHECK12-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK12-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK12-NEXT: br label [[OMP_PRECOND_END]] // CHECK12: cancel.exit: -// CHECK12-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK12-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) // CHECK12-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK12: omp.precond.end: // CHECK12-NEXT: br label [[CANCEL_CONT]] @@ -2499,27 +2717,30 @@ // CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 // CHECK12-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK12-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK12-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 // CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32*, i32** [[G_ADDR]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [1000 x i32]*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP2]], [1000 x i32]* [[TMP0]], i32* [[TMP3]]) +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK12-NEXT: store [1000 x i32]* [[TMP0]], [1000 x i32]** [[TMP3]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32*, i32** [[G_ADDR]], align 4 +// CHECK12-NEXT: store i32* [[TMP5]], i32** [[TMP4]], align 4 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK12-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK12-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2529,91 +2750,108 @@ // CHECK12-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK12-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK12-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK12-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK12-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK12: omp.precond.then: // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK12-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: br label [[COND_END:%.*]] // CHECK12: cond.false: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: br label [[COND_END]] // CHECK12: cond.end: -// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]) +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4 +// CHECK12-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: store i32 [[TMP24]], i32* [[TMP23]], align 4 +// CHECK12-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 +// CHECK12-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK12-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP27]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK12-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK12-NEXT: store i32* [[TMP29]], i32** [[TMP28]], align 4 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: -// CHECK12-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK12-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK12-NEXT: br label [[OMP_PRECOND_END]] // CHECK12: omp.precond.end: // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK12-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2626,82 +2864,90 @@ // CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK12-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK12-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK12-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK12-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK12: omp.precond.then: // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK12-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: br label [[COND_END:%.*]] // CHECK12: cond.false: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: br label [[COND_END]] // CHECK12: cond.end: -// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4 -// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK12-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 0 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i32 0, i32 [[TMP28]] +// CHECK12-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX6]], align 4 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: -// CHECK12-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK12-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]]) // CHECK12-NEXT: br label [[OMP_PRECOND_END]] // CHECK12: omp.precond.end: // CHECK12-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp @@ -148,18 +148,21 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -167,66 +170,81 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 56087, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -238,76 +256,82 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK1-NEXT: store i32 [[ADD7]], i32* [[J]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -367,18 +391,21 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -386,64 +413,79 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 56087, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -455,72 +497,78 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], i32* [[J]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i32 0, i32 [[TMP19]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP20]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -667,8 +715,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -679,291 +726,316 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[M_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[M_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], i64* [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP9]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[M_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[M]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]] -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB11]], i64* [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 // CHECK9-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], i64* [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV17:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV17]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV18]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[M_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP19]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP27]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP30]], i64* [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP32]], i64* [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP6]], i64* [[TMP37]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i64 [[TMP8]], i64* [[TMP38]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store i32* [[TMP10]], i32** [[TMP39]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP40]], [[TMP41]] // CHECK9-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK9-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[M_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], i32* [[M]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]] -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB11]], i64* [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 // CHECK9-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[TMP10]], i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP21]], i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP22]], i64* [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP30]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK9-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP21]], [[CONV20]] -// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK9-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK9-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK9-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK9-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK9-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK9-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK9-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP23]], [[MUL33]] -// CHECK9-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK9-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK9-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I13]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[J14]], align 4 -// CHECK9-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM38]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX39]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP33]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], i32* [[I9]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP36]], [[CONV23]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP35]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], i32* [[J10]], align 4 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[I9]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP39]] to i64 +// CHECK9-NEXT: [[TMP40:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP12]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i64 [[TMP40]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[J10]], align 4 +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX35]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK9-NEXT: store i64 [[ADD40]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1005,18 +1077,21 @@ // CHECK9-SAME: ([10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x [2 x i32]]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [10 x [2 x i32]]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x [2 x i32]]* [[TMP0]], [10 x [2 x i32]]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1024,66 +1099,81 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 19, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x [2 x i32]]* [[TMP0]]) +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x [2 x i32]]* [[TMP2]], [10 x [2 x i32]]** [[TMP18]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1095,75 +1185,81 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK9-NEXT: store i32 [[ADD7]], i32* [[J]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[J]], align 4 +// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK9-NEXT: ret void // // @@ -1307,8 +1403,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -1317,285 +1412,318 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[M_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[M_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP9]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[M]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 // CHECK11-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], i64* [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP23]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], i32* [[M_CASTED]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[M_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP20]], i32 [[TMP22]], i32 [[TMP24]], i32 [[TMP26]], i32 [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +// CHECK11-NEXT: store i32 [[TMP28]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = trunc i64 [[TMP29]] to i32 +// CHECK11-NEXT: store i32 [[TMP30]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP38]], i32* [[TMP37]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[TMP39]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[TMP40]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store i32* [[TMP10]], i32** [[TMP41]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP43:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP42]], [[TMP43]] // CHECK11-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP45]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[M]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 // CHECK11-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK11-NEXT: store i64 [[CONV11]], i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[CONV12]], i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP21]], i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[CONV9:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[CONV10:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK11-NEXT: store i64 [[CONV9]], i64* [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[CONV10]], i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP30]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK11-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP21]], [[CONV20]] -// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK11-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK11-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK11-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK11-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK11-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK11-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK11-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP23]], [[MUL33]] -// CHECK11-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK11-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK11-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK11-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I13]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP1]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[J14]], align 4 -// CHECK11-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i32 [[TMP29]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX38]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP33]], [[CONV18]] +// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV21]], i32* [[I11]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP36]], [[CONV25]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP35]], [[MUL31]] +// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK11-NEXT: store i32 [[CONV35]], i32* [[J12]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[I11]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = mul nsw i32 [[TMP39]], [[TMP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 [[TMP40]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[J12]], align 4 +// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i32 [[TMP41]] +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX36]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK11-NEXT: store i64 [[ADD39]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK11-NEXT: store i64 [[ADD37]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -1637,18 +1765,21 @@ // CHECK11-SAME: ([10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x [2 x i32]]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [10 x [2 x i32]]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x [2 x i32]]* [[TMP0]], [10 x [2 x i32]]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1656,64 +1787,79 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 19, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x [2 x i32]]* [[TMP0]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [10 x [2 x i32]]* [[TMP2]], [10 x [2 x i32]]** [[TMP16]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1725,71 +1871,77 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], i32* [[J]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP6]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[J]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP20]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp @@ -227,83 +227,101 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -313,64 +331,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -378,83 +402,101 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -464,64 +506,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -529,103 +577,121 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP14]], i64* [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP16]], i64* [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP17]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], 122 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 122 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: -// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP18]], [[COND_FALSE6]] ] +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -635,64 +701,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -796,81 +868,99 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -880,61 +970,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -942,81 +1038,99 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1026,61 +1140,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1088,101 +1208,119 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP15]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], 122 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 122 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP16]], [[COND_FALSE6]] ] +// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1192,61 +1330,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1495,29 +1639,31 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1527,95 +1673,110 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1625,87 +1786,94 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1717,29 +1885,31 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1749,95 +1919,110 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1847,87 +2032,94 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1940,8 +2132,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -1950,250 +2141,273 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK9-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK9-NEXT: store i64 [[TMP24]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP28]], i64* [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP30]], i64* [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i32* [[TMP6]], i32** [[TMP34]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] -// CHECK9: cond.true14: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: br label [[COND_END16:%.*]] -// CHECK9: cond.false15: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: br label [[COND_END16]] -// CHECK9: cond.end16: -// CHECK9-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE14]] ], [ [[TMP33]], [[COND_FALSE15]] ] -// CHECK9-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK9: cond.true11: +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: br label [[COND_END13:%.*]] +// CHECK9: cond.false12: +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: br label [[COND_END13]] +// CHECK9: cond.end13: +// CHECK9-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE11]] ], [ [[TMP46]], [[COND_FALSE12]] ] +// CHECK9-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP47]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK9-NEXT: [[TMP48:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP48]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP49]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I7:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 8 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK9-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -2296,83 +2510,101 @@ // CHECK9-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2382,63 +2614,69 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK9-NEXT: ret void // // @@ -2446,83 +2684,101 @@ // CHECK9-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2532,63 +2788,69 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK9-NEXT: ret void // // @@ -2597,120 +2859,135 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK9-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP19]], i64* [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP20]], 9 -// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] -// CHECK9: cond.true6: -// CHECK9-NEXT: br label [[COND_END8:%.*]] -// CHECK9: cond.false7: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: br label [[COND_END8]] -// CHECK9: cond.end8: -// CHECK9-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP21]], [[COND_FALSE7]] ] -// CHECK9-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP29]], 9 +// CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK9: cond.true5: +// CHECK9-NEXT: br label [[COND_END7:%.*]] +// CHECK9: cond.false6: +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: br label [[COND_END7]] +// CHECK9: cond.end7: +// CHECK9-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP30]], [[COND_FALSE6]] ] +// CHECK9-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2720,65 +2997,72 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK9-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK9-NEXT: ret void // // @@ -3025,27 +3309,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3055,91 +3342,108 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3152,80 +3456,88 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -3237,27 +3549,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3267,91 +3582,108 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3364,80 +3696,88 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -3450,33 +3790,35 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3486,122 +3828,140 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP24]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], i32* [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[TMP6]], i32** [[TMP32]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK11-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK11: cond.true11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END13:%.*]] // CHECK11: cond.false12: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END13]] // CHECK11: cond.end13: -// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] +// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] // CHECK11-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3614,81 +3974,91 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP29]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -3790,81 +4160,99 @@ // CHECK11-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3874,60 +4262,66 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK11-NEXT: ret void // // @@ -3935,81 +4329,99 @@ // CHECK11-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4019,60 +4431,66 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK11-NEXT: ret void // // @@ -4081,114 +4499,132 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP27]], 9 // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK11: cond.true5: // CHECK11-NEXT: br label [[COND_END7:%.*]] // CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END7]] // CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ] +// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP28]], [[COND_FALSE6]] ] // CHECK11-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4198,61 +4634,69 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP19]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp @@ -394,8 +394,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -406,142 +405,161 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP29]], i64* [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i64 2 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP26]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP41]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done11: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // @@ -579,144 +597,150 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP8:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast [2 x %struct.S]* [[TMP10]] to %struct.S* +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP20]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done6: -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP20]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP12]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP22]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP23]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP29]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP34]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN14]], i64 2 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP27]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP39]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done15: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -724,33 +748,33 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) -// CHECK1-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK1-NEXT: store %struct.S.2* [[TEST]], %struct.S.2** [[VAR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S.2*, %struct.S.2** [[VAR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP1]], %struct.S.2** [[TMP]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8** [[TMP5]] to [2 x i32]** // CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP6]], align 8 @@ -768,19 +792,19 @@ // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK1-NEXT: store i8* null, i8** [[TMP14]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to [2 x %struct.S.0]** -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to [2 x %struct.S.2]** +// CHECK1-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.2]** +// CHECK1-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP18]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 // CHECK1-NEXT: store i8* null, i8** [[TMP19]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to %struct.S.0** -// CHECK1-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to %struct.S.2** +// CHECK1-NEXT: store %struct.S.2* [[TMP4]], %struct.S.2** [[TMP21]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK1-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.2** +// CHECK1-NEXT: store %struct.S.2* [[TMP4]], %struct.S.2** [[TMP23]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 // CHECK1-NEXT: store i8* null, i8** [[TMP24]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -790,21 +814,21 @@ // CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 // CHECK1-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81([2 x i32]* [[VEC]], i64 [[TMP3]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81([2 x i32]* [[VEC]], i64 [[TMP3]], [2 x %struct.S.2]* [[S_ARR]], %struct.S.2* [[TMP4]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done2: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK1-NEXT: ret i32 [[TMP30]] // @@ -852,365 +876,391 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81 -// CHECK1-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.2]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.2]*, align 8 +// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[S_ARR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[S_ARR_ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.2*, %struct.S.2** [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP2]], %struct.S.2** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S.2]* [[TMP1]], [2 x %struct.S.2]** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP8]], %struct.S.2** [[TMP7]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP7]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP8]], %struct.S.2** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S.2]* [[TMP6]] to %struct.S.2* +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.2* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP11]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.2* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP13:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TMP13]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK1-NEXT: store i64 [[TMP22]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK1-NEXT: store i64 [[TMP24]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP26]], i64* [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP28]], i64* [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP31]], i32* [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP34:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP4]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP34]], %struct.S.2** [[TMP33]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i64 2 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP26]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done11: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP39]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done7: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_2St -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[S]], %struct.S.2** [[S_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S.2*, %struct.S.2** [[S_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP8:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP9:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP4:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP11]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP12]], %struct.S.2** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S.2]* [[TMP10]] to %struct.S.2* +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.2* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP17]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done6: -// CHECK1-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP9]], %struct.St* noundef [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) #[[ATTR2]] -// CHECK1-NEXT: store %struct.S.0* [[VAR7]], %struct.S.0** [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.2* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done3: +// CHECK1-NEXT: [[TMP19:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TMP19]], %struct.St* noundef [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP21]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM11]] -// CHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX12]] to i8* -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S.2* [[ARRAYIDX8]] to i8* +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S.2* [[TMP30]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN14]], i64 2 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN10]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP27]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done15: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP37]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done11: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1219,16 +1269,16 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_2St -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 -// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[S]], %struct.S.2** [[S_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S.2*, %struct.S.2** [[S_ADDR]], align 8 +// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[T]], i32 0, i32 0 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 @@ -1238,11 +1288,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -1454,8 +1504,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1464,134 +1513,159 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC1]] to i8* -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP32]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP24]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP39]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done8: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done5: // CHECK3-NEXT: ret void // // @@ -1629,138 +1703,146 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC1]] to i8* -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast [2 x %struct.S]* [[TMP10]] to %struct.S* +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP20]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP20]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP12]], %struct.St* noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP22]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP23]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP18]] -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* -// CHECK3-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP30]] +// CHECK3-NEXT: store i32 [[TMP29]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP31]] +// CHECK3-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK3-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP35]], [[TMP34]] +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP36]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP27]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP39]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1768,32 +1850,32 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK3-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK3-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP1]], %struct.S.1** [[TMP]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[T_VAR_CASTED]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8** [[TMP5]] to [2 x i32]** // CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP6]], align 4 @@ -1811,19 +1893,19 @@ // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK3-NEXT: store i8* null, i8** [[TMP14]], align 4 // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to [2 x %struct.S.0]** -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to [2 x %struct.S.1]** +// CHECK3-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP16]], align 4 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.1]** +// CHECK3-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP18]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 // CHECK3-NEXT: store i8* null, i8** [[TMP19]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to %struct.S.0** -// CHECK3-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to %struct.S.1** +// CHECK3-NEXT: store %struct.S.1* [[TMP4]], %struct.S.1** [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK3-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.1** +// CHECK3-NEXT: store %struct.S.1* [[TMP4]], %struct.S.1** [[TMP23]], align 4 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 // CHECK3-NEXT: store i8* null, i8** [[TMP24]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -1833,21 +1915,21 @@ // CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 // CHECK3-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81([2 x i32]* [[VEC]], i32 [[TMP3]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81([2 x i32]* [[VEC]], i32 [[TMP3]], [2 x %struct.S.1]* [[S_ARR]], %struct.S.1* [[TMP4]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK3: arraydestroy.done2: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK3-NEXT: ret i32 [[TMP30]] // @@ -1895,354 +1977,384 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK3-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81 -// CHECK3-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.1]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 4 +// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP2]], %struct.S.1** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x %struct.S.1]* [[TMP1]], [2 x %struct.S.1]** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP7]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S.1]* [[TMP6]] to %struct.S.1* +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP11]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP13:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP13]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], i32* [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP32:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP4]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP32]], %struct.S.1** [[TMP31]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i32 2 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP24]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done10: +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP37]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done7: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_2St -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[S]], %struct.S.1** [[S_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S.1*, %struct.S.1** [[S_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP11]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S.1]* [[TMP10]] to %struct.S.1* +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP17]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP9]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP19:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP19]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP21]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* -// CHECK3-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP29]] +// CHECK3-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP31]] +// CHECK3-NEXT: [[TMP32:%.*]] = bitcast %struct.S.1* [[ARRAYIDX6]] to i8* +// CHECK3-NEXT: [[TMP33:%.*]] = bitcast %struct.S.1* [[TMP30]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP27]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP37]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -2251,16 +2363,16 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_2St -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 4 -// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[S]], %struct.S.1** [[S_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S.1*, %struct.S.1** [[S_ADDR]], align 4 +// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[TMP0]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[T]], i32 0, i32 0 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 @@ -2270,11 +2382,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -2424,9 +2536,7 @@ // CHECK5-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK5-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK5-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK5-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 @@ -2434,201 +2544,217 @@ // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK5-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load volatile i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* @g, align 4 +// CHECK5-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[G_ADDR]] to i32* -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* -// CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK5-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[G]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], i32* [[G1]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], i32* [[SIVAR]], align 4 +// CHECK5-NEXT: store i32* [[G1]], i32** [[TMP]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4 -// CHECK5-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK5-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]) +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK5-NEXT: store i64 [[TMP17]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP19]], i64* [[TMP18]], align 8 +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP21]], i64* [[TMP20]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[G]], align 4 +// CHECK5-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 8 +// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* @g, align 4 +// CHECK5-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK5-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[G_ADDR]] to i32* -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* -// CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK5-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK5-NEXT: store i32 [[TMP6]], i32* [[G]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], i32* [[G1]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK5-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 +// CHECK5-NEXT: store i32* [[G1]], i32** [[TMP]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: store i32 1, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK5-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK5-NEXT: store i32 2, i32* [[CONV2]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK5-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK5-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8 -// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK5-NEXT: store i32 1, i32* [[G]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK5-NEXT: store volatile i32 1, i32* [[TMP21]], align 4 +// CHECK5-NEXT: store i32 2, i32* [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store i32* [[G]], i32** [[TMP22]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK5-NEXT: store i32* [[TMP24]], i32** [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK5-NEXT: store i32* [[SIVAR]], i32** [[TMP25]], align 8 +// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK5-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) // CHECK5-NEXT: ret void // // @@ -2656,8 +2782,7 @@ // CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 // CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK13-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK13-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK13-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK13-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -2668,142 +2793,161 @@ // CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i64 [[TMP6]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK13-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* +// CHECK13-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK13-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK13-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK13-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4:[0-9]+]] // CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5:[0-9]+]] // CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done4: -// CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done1: +// CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* noundef [[AGG_TMP2]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR5]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]) +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK13-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP29]], i64* [[TMP28]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK13-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 8 +// CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP33]], align 8 +// CHECK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK13-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP34]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK13-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK13-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i64 2 +// CHECK13-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) +// CHECK13-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP26]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP41]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done11: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done5: // CHECK13-NEXT: ret void // // @@ -2841,144 +2985,150 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK13-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP8:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* +// CHECK13-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 7 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK13-NEXT: store i32 [[TMP14]], i32* [[SIVAR]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK13-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK13-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: [[TMP17:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK13-NEXT: [[TMP18:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP19:%.*]] = bitcast [2 x %struct.S]* [[TMP10]] to %struct.S* +// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP20]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] // CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done6: -// CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP8]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) #[[ATTR5]] -// CHECK13-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP20]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done2: +// CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP12]], %struct.St* noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP22]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP23]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK13-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK13-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* -// CHECK13-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK13-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[TMP29]], i32* [[ARRAYIDX]], align 4 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK13-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* +// CHECK13-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP34]] +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[SIVAR]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK13-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], 1 +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK13-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN14]], i64 2 +// CHECK13-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK13-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP27]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP39]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done15: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done10: // CHECK13-NEXT: ret void // // @@ -2993,318 +3143,344 @@ // // // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81 -// CHECK13-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.2]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 // CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.2]*, align 8 +// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK13-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK13-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[S_ARR_ADDR]], align 8 +// CHECK13-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[VAR_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[S_ARR_ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S.2*, %struct.S.2** [[VAR_ADDR]], align 8 +// CHECK13-NEXT: store %struct.S.2* [[TMP2]], %struct.S.2** [[TMP]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [2 x %struct.S.2]* [[TMP1]], [2 x %struct.S.2]** [[TMP6]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK13-NEXT: store %struct.S.2* [[TMP8]], %struct.S.2** [[TMP7]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK13-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 8 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[_TMP4:%.*]] = alloca %struct.S.2*, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK13-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP7]], align 8 +// CHECK13-NEXT: store %struct.S.2* [[TMP8]], %struct.S.2** [[TMP]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK13-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK13-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S.2]* [[TMP6]] to %struct.S.2* +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.2* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP11]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done4: -// CHECK13-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.St* noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK13-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.2* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done2: +// CHECK13-NEXT: [[TMP13:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TMP13]], %struct.St* noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK13-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP4]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK13-NEXT: store i64 [[TMP22]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK13-NEXT: store i64 [[TMP24]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP26]], i64* [[TMP25]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP28]], i64* [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP29]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK13-NEXT: store i32 [[TMP31]], i32* [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP32]], align 8 +// CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP34:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP4]], align 8 +// CHECK13-NEXT: store %struct.S.2* [[TMP34]], %struct.S.2** [[TMP33]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i64 2 +// CHECK13-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN6]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP26]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done11: +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP39]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done7: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_2St -// CHECK13-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { +// CHECK13-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { // CHECK13-NEXT: entry: -// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 -// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 -// CHECK13-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) #[[ATTR4]] +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: store %struct.S.2* [[S]], %struct.S.2** [[S_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.S.2*, %struct.S.2** [[S_ADDR]], align 8 +// CHECK13-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) #[[ATTR4]] // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP8:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK13-NEXT: [[_TMP9:%.*]] = alloca %struct.S.0*, align 8 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP4:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[_TMP5:%.*]] = alloca %struct.S.2*, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK13-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP12:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP11]], align 8 +// CHECK13-NEXT: store %struct.S.2* [[TMP12]], %struct.S.2** [[TMP]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK13-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK13-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: [[TMP15:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK13-NEXT: [[TMP16:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S.2]* [[TMP10]] to %struct.S.2* +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.2* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP17]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done6: -// CHECK13-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP9]], %struct.St* noundef [[AGG_TMP8]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) #[[ATTR5]] -// CHECK13-NEXT: store %struct.S.0* [[VAR7]], %struct.S.0** [[_TMP9]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.2* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done3: +// CHECK13-NEXT: [[TMP19:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TMP19]], %struct.St* noundef [[AGG_TMP4]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR5]] +// CHECK13-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP5]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP21]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK13-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK13-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM11]] -// CHECK13-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX12]] to i8* -// CHECK13-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX]], align 4 +// CHECK13-NEXT: [[TMP30:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP5]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK13-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK13-NEXT: [[TMP32:%.*]] = bitcast %struct.S.2* [[ARRAYIDX8]] to i8* +// CHECK13-NEXT: [[TMP33:%.*]] = bitcast %struct.S.2* [[TMP30]] to i8* +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK13-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN14]], i64 2 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN10]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP27]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done15: +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP37]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done11: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK13-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { +// CHECK13-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { // CHECK13-NEXT: entry: -// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: call void @_ZN1SIiED2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] // CHECK13-NEXT: ret void // // @@ -3360,25 +3536,25 @@ // // // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK13-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { +// CHECK13-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { // CHECK13-NEXT: entry: -// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_2St -// CHECK13-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { +// CHECK13-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { // CHECK13-NEXT: entry: -// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 -// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 -// CHECK13-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: store %struct.S.2* [[S]], %struct.S.2** [[S_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.S.2*, %struct.S.2** [[S_ADDR]], align 8 +// CHECK13-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[TMP0]], i32 0, i32 0 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 // CHECK13-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[T]], i32 0, i32 0 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 @@ -3395,8 +3571,7 @@ // CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 // CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK15-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK15-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK15-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK15-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -3405,134 +3580,159 @@ // CHECK15-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK15-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i32 [[TMP6]]) +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK15-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC1]] to i8* -// CHECK15-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK15-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4:[0-9]+]] // CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5:[0-9]+]] // CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done3: -// CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR5]] -// CHECK15-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done1: +// CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* noundef [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR5]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]) +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK15-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP31]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP32]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK15-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK15-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 +// CHECK15-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK15-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP24]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP39]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done8: +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done5: // CHECK15-NEXT: ret void // // @@ -3570,138 +3770,146 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK15-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], i32* [[SIVAR]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC1]] to i8* -// CHECK15-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK15-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: [[TMP17:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK15-NEXT: [[TMP18:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP19:%.*]] = bitcast [2 x %struct.S]* [[TMP10]] to %struct.S* +// CHECK15-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP20]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] // CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done3: -// CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR5]] -// CHECK15-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP20]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done1: +// CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP12]], %struct.St* noundef [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR5]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP22]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP23]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP18]] -// CHECK15-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP19]] -// CHECK15-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* -// CHECK15-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK15-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP30]] +// CHECK15-NEXT: store i32 [[TMP29]], i32* [[ARRAYIDX]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP31]] +// CHECK15-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK15-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false) +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP35]], [[TMP34]] +// CHECK15-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP36]], 1 +// CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK15-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i32 2 +// CHECK15-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK15-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP27]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP39]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done11: +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done8: // CHECK15-NEXT: ret void // // @@ -3716,307 +3924,337 @@ // // // CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81 -// CHECK15-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.1]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 // CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 4 +// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK15-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK15-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK15-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 +// CHECK15-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK15-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK15-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 4 +// CHECK15-NEXT: store %struct.S.1* [[TMP2]], %struct.S.1** [[TMP]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [2 x %struct.S.1]* [[TMP1]], [2 x %struct.S.1]** [[TMP6]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK15-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP7]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK15-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[_TMP4:%.*]] = alloca %struct.S.1*, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK15-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 4 +// CHECK15-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK15-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK15-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S.1]* [[TMP6]] to %struct.S.1* +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP11]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done4: -// CHECK15-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.St* noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK15-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done2: +// CHECK15-NEXT: [[TMP13:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP13]], %struct.St* noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK15-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP4]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]) +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP24]], i32* [[TMP23]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP27]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK15-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP30]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP32:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP4]], align 4 +// CHECK15-NEXT: store %struct.S.1* [[TMP32]], %struct.S.1** [[TMP31]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i32 2 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP24]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done10: +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP37]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done7: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_2St -// CHECK15-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { +// CHECK15-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { // CHECK15-NEXT: entry: -// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 4 -// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 4 -// CHECK15-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) #[[ATTR4]] +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK15-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK15-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: store %struct.S.1* [[S]], %struct.S.1** [[S_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.S.1*, %struct.S.1** [[S_ADDR]], align 4 +// CHECK15-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) #[[ATTR4]] // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK15-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[_TMP4:%.*]] = alloca %struct.S.1*, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK15-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP11]], align 4 +// CHECK15-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK15-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK15-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: [[TMP15:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK15-NEXT: [[TMP16:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S.1]* [[TMP10]] to %struct.S.1* +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP17]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done4: -// CHECK15-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP9]], %struct.St* noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK15-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done2: +// CHECK15-NEXT: [[TMP19:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP19]], %struct.St* noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK15-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP4]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP21]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP19]] -// CHECK15-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP21]] -// CHECK15-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* -// CHECK15-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false) +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP29]] +// CHECK15-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP4]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP31]] +// CHECK15-NEXT: [[TMP32:%.*]] = bitcast %struct.S.1* [[ARRAYIDX6]] to i8* +// CHECK15-NEXT: [[TMP33:%.*]] = bitcast %struct.S.1* [[TMP30]] to i8* +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false) // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK15-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i32 2 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN8]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP27]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done12: +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP37]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done9: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK15-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { +// CHECK15-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { // CHECK15-NEXT: entry: -// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK15-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] // CHECK15-NEXT: ret void // // @@ -4072,25 +4310,25 @@ // // // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK15-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { +// CHECK15-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { // CHECK15-NEXT: entry: -// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK15-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_2St -// CHECK15-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { +// CHECK15-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { // CHECK15-NEXT: entry: -// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 4 -// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 4 -// CHECK15-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK15-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK15-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: store %struct.S.1* [[S]], %struct.S.1** [[S_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.S.1*, %struct.S.1** [[S_ADDR]], align 4 +// CHECK15-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[TMP0]], i32 0, i32 0 // CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 // CHECK15-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[T]], i32 0, i32 0 // CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 @@ -4106,9 +4344,7 @@ // CHECK17-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK17-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK17-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK17-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 @@ -4116,123 +4352,130 @@ // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK17-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load volatile i32, i32* [[TMP2]], align 4 -// CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* @g, align 4 +// CHECK17-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK17-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[G_ADDR]] to i32* -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK17-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 +// CHECK17-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[G]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[G1]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], i32* [[SIVAR]], align 4 +// CHECK17-NEXT: store i32* [[G1]], i32** [[TMP]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4 -// CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]) +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK17-NEXT: store i64 [[TMP17]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP19]], i64* [[TMP18]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP21]], i64* [[TMP20]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[G]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* @g, align 4 +// CHECK17-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK17-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 @@ -4241,75 +4484,84 @@ // CHECK17-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[G_ADDR]] to i32* -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK17-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 +// CHECK17-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], i32* [[G]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[G1]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK17-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 +// CHECK17-NEXT: store i32* [[G1]], i32** [[TMP]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: store i32 1, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK17-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK17-NEXT: store i32 2, i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0 -// CHECK17-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK17-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 2 -// CHECK17-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8 +// CHECK17-NEXT: store i32 1, i32* [[G]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK17-NEXT: store volatile i32 1, i32* [[TMP21]], align 4 +// CHECK17-NEXT: store i32 2, i32* [[SIVAR]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[G]], i32** [[TMP22]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK17-NEXT: store i32* [[TMP24]], i32** [[TMP23]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[SIVAR]], i32** [[TMP25]], align 8 // CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK17-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) // CHECK17-NEXT: ret void // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp @@ -132,77 +132,93 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -212,139 +228,161 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l51 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP16]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -354,43 +392,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: call void @_Z9gtid_testv() @@ -398,14 +442,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -478,77 +522,93 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -558,43 +618,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn4v() @@ -602,96 +668,112 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP16]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -701,43 +783,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn5v() @@ -745,14 +833,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -760,103 +848,120 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[CONV1]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[TMP0]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP19]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -866,43 +971,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn6v() @@ -910,14 +1021,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -979,77 +1090,93 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l60 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1059,43 +1186,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn1v() @@ -1103,96 +1236,112 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l64 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..12 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP16]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1202,43 +1351,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn2v() @@ -1246,14 +1401,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -1261,103 +1416,120 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[CONV1]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[TMP0]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK1-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP19]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1367,43 +1539,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn3v() @@ -1411,14 +1589,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp @@ -195,10 +195,7 @@ // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], i64* [[SFVAR_ADDR]], align 8 @@ -208,260 +205,277 @@ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_ADDR]] to double* // CHECK1-NEXT: store double* [[CONV]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load volatile double, double* [[TMP0]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP1]], double* [[CONV4]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load float, float* [[CONV2]], align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* -// CHECK1-NEXT: store float [[TMP5]], float* [[CONV6]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load double, double* [[CONV3]], align 8 -// CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[G_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP7]], double* [[CONV7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load volatile double, double* [[TMP1]], align 8 +// CHECK1-NEXT: store double [[TMP2]], double* [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[CONV2]], align 4 +// CHECK1-NEXT: store float [[TMP6]], float* [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, double* [[CONV3]], align 8 +// CHECK1-NEXT: store double [[TMP8]], double* [[TMP7]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]], i64 noundef [[G:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G5:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G16:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR9:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G2:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G13:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], i64* [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[G1_ADDR]] to double* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_ADDR]] to double* -// CHECK1-NEXT: store double* [[CONV]], double** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double, double* [[TMP1]], align 8 +// CHECK1-NEXT: store double [[TMP2]], double* [[G1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[TMP5]], align 4 +// CHECK1-NEXT: store float [[TMP6]], float* [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, double* [[TMP7]], align 8 +// CHECK1-NEXT: store double [[TMP8]], double* [[G]], align 8 +// CHECK1-NEXT: store double* [[G1]], double** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[G16]], double** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[G13]], double** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load volatile double, double* [[TMP12]], align 8 -// CHECK1-NEXT: [[CONV11:%.*]] = bitcast i64* [[G1_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP13]], double* [[CONV11]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK1-NEXT: [[CONV12:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[CONV12]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load float, float* [[SFVAR9]], align 4 -// CHECK1-NEXT: [[CONV13:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* -// CHECK1-NEXT: store float [[TMP17]], float* [[CONV13]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load double, double* [[G5]], align 8 -// CHECK1-NEXT: [[CONV14:%.*]] = bitcast i64* [[G_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP19]], double* [[CONV14]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK1-NEXT: store i64 [[TMP18]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP22]], i64* [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP24]], i64* [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load volatile double, double* [[TMP26]], align 8 +// CHECK1-NEXT: store double [[TMP27]], double* [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[SVAR5]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load float, float* [[SFVAR6]], align 4 +// CHECK1-NEXT: store float [[TMP31]], float* [[TMP30]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP33:%.*]] = load double, double* [[G2]], align 8 +// CHECK1-NEXT: store double [[TMP33]], double* [[TMP32]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[G5]], align 8 -// CHECK1-NEXT: store volatile double [[TMP25]], double* [[CONV3]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load double, double* [[TMP26]], align 8 -// CHECK1-NEXT: store volatile double [[TMP27]], double* [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK1-NEXT: store i32 [[TMP28]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load float, float* [[SFVAR9]], align 4 -// CHECK1-NEXT: store float [[TMP29]], float* [[CONV2]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load double, double* [[G2]], align 8 +// CHECK1-NEXT: store volatile double [[TMP38]], double* [[G]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = load double*, double** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load double, double* [[TMP39]], align 8 +// CHECK1-NEXT: store volatile double [[TMP40]], double* [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[SVAR5]], align 4 +// CHECK1-NEXT: store i32 [[TMP41]], i32* [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load float, float* [[SFVAR6]], align 4 +// CHECK1-NEXT: store float [[TMP42]], float* [[SFVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]], i64 noundef [[G:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G7:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G18:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP9:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR10:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR11:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], i64* [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[G1_ADDR]] to double* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_ADDR]] to double* -// CHECK1-NEXT: store double* [[CONV]], double** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double, double* [[TMP5]], align 8 +// CHECK1-NEXT: store double [[TMP6]], double* [[G1]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load float, float* [[TMP9]], align 4 +// CHECK1-NEXT: store float [[TMP10]], float* [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 8 +// CHECK1-NEXT: store double [[TMP12]], double* [[G]], align 8 +// CHECK1-NEXT: store double* [[G1]], double** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[G18]], double** [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[G14]], double** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP9]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP11]], align 8 -// CHECK1-NEXT: store i32 3, i32* [[SVAR10]], align 4 -// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR11]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[G7]], double** [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP9]], align 8 -// CHECK1-NEXT: store double* [[TMP14]], double** [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[SVAR10]], i32** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[SFVAR11]], float** [[TMP16]], align 8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK1-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP5]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP24]], align 8 +// CHECK1-NEXT: store i32 3, i32* [[SVAR6]], align 4 +// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR7]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[G3]], double** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load double*, double** [[_TMP5]], align 8 +// CHECK1-NEXT: store double* [[TMP27]], double** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SVAR6]], i32** [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[SFVAR7]], float** [[TMP29]], align 8 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP20:%.*]] = load double, double* [[G7]], align 8 -// CHECK1-NEXT: store volatile double [[TMP20]], double* [[CONV3]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load double*, double** [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[TMP21]], align 8 -// CHECK1-NEXT: store volatile double [[TMP22]], double* [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR10]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR11]], align 4 -// CHECK1-NEXT: store float [[TMP24]], float* [[CONV2]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load double, double* [[G3]], align 8 +// CHECK1-NEXT: store volatile double [[TMP33]], double* [[G]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load double*, double** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, double* [[TMP34]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], double* [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[SVAR6]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], i32* [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load float, float* [[SFVAR7]], align 4 +// CHECK1-NEXT: store float [[TMP37]], float* [[SFVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -500,8 +514,7 @@ // CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SFVAR]], i32* [[SFVAR_ADDR]], align 4 @@ -510,27 +523,29 @@ // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[SFVAR_ADDR]] to float* // CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G_ADDR]], align 4 // CHECK3-NEXT: store double* [[TMP0]], double** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load float, float* [[CONV]], align 4 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[SFVAR_CASTED]] to float* -// CHECK3-NEXT: store float [[TMP5]], float* [[CONV1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, i32, i32, double*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], double* [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[TMP3]], double** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[SVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load float, float* [[CONV]], align 4 +// CHECK3-NEXT: store float [[TMP7]], float* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double* [[TMP1]], double** [[TMP8]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -538,104 +553,120 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR4:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], i32* [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[SFVAR_ADDR]] to float* -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[TMP0]], double** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load float, float* [[TMP5]], align 4 +// CHECK3-NEXT: store float [[TMP6]], float* [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double*, double** [[TMP7]], align 4 +// CHECK3-NEXT: store double* [[TMP2]], double** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load float, float* [[SFVAR6]], align 4 -// CHECK3-NEXT: [[CONV8:%.*]] = bitcast i32* [[SFVAR_CASTED]] to float* -// CHECK3-NEXT: store float [[TMP15]], float* [[CONV8]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, i32, i32, double*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP10]], i32 [[TMP11]], double* [[TMP12]], i32 [[TMP14]], i32 [[TMP16]], double* [[G2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: store double* [[TMP24]], double** [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[SVAR3]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load float, float* [[SFVAR4]], align 4 +// CHECK3-NEXT: store float [[TMP28]], float* [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store double* [[G]], double** [[TMP29]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP21:%.*]] = load double, double* [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP21]], double* [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load double, double* [[TMP22]], align 4 -// CHECK3-NEXT: store volatile double [[TMP23]], double* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP24]], i32* [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load float, float* [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP25]], float* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP34]], double* [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP35:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load double, double* [[TMP35]], align 4 +// CHECK3-NEXT: store volatile double [[TMP36]], double* [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[SVAR3]], align 4 +// CHECK3-NEXT: store i32 [[TMP37]], i32* [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load float, float* [[SFVAR4]], align 4 +// CHECK3-NEXT: store float [[TMP38]], float* [[SFVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -643,101 +674,110 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR4:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], i32* [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[SFVAR_ADDR]] to float* -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[TMP0]], double** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load float, float* [[TMP9]], align 4 +// CHECK3-NEXT: store float [[TMP10]], float* [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load double*, double** [[TMP11]], align 4 +// CHECK3-NEXT: store double* [[TMP6]], double** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP13]], align 4 -// CHECK3-NEXT: store i32 3, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G2]], double** [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: store double* [[TMP16]], double** [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR5]], i32** [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[SFVAR6]], float** [[TMP18]], align 4 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP24]], align 4 +// CHECK3-NEXT: store i32 3, i32* [[SVAR3]], align 4 +// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR4]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: store double* [[TMP27]], double** [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR3]], i32** [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[SFVAR4]], float** [[TMP29]], align 4 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP22]], double* [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[TMP23]], align 4 -// CHECK3-NEXT: store volatile double [[TMP24]], double* [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP25]], i32* [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load float, float* [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP26]], float* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP33]], double* [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, double* [[TMP34]], align 4 +// CHECK3-NEXT: store volatile double [[TMP35]], double* [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[SVAR3]], align 4 +// CHECK3-NEXT: store i32 [[TMP36]], i32* [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load float, float* [[SFVAR4]], align 4 +// CHECK3-NEXT: store float [[TMP37]], float* [[SFVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -890,8 +930,7 @@ // CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK5-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK5-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK5-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -903,63 +942,70 @@ // CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK5-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[CONV3]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP5]], i64 [[TMP7]]) +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK5-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK5-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK5-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK5-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK5-NEXT: [[_TMP7:%.*]] = alloca %struct.S*, align 8 -// CHECK5-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK5-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 8 +// CHECK5-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK5-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* -// CHECK5-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK5-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK5-NEXT: store i32 [[TMP10]], i32* [[SVAR]], align 4 +// CHECK5-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: @@ -969,160 +1015,178 @@ // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK5-NEXT: store %struct.S* [[VAR6]], %struct.S** [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK5-NEXT: [[CONV10:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP15]], i32* [[CONV10]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK5-NEXT: [[CONV11:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP18]], i32* [[CONV11]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP12]], i64 [[TMP14]], [2 x i32]* [[VEC4]], i64 [[TMP16]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP17]], i64 [[TMP19]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK5-NEXT: store i64 [[TMP20]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK5-NEXT: store i64 [[TMP22]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP24]], i64* [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP26]], i64* [[TMP25]], align 8 +// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP27]], align 8 +// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK5-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8 +// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP30]], align 8 +// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP32:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8 +// CHECK5-NEXT: store %struct.S* [[TMP32]], %struct.S** [[TMP31]], align 8 +// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, i32* [[SVAR4]], align 4 +// CHECK5-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK5-NEXT: br i1 [[TMP40]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK5-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK5-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP29:%.*]] = bitcast [2 x %struct.S]* [[S_ARR5]] to %struct.S* -// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN12]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN12]], [[TMP30]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP41:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK5-NEXT: store i32 [[TMP41]], i32* [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP42:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK5-NEXT: [[TMP43:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP44:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK5-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN6]], [[TMP45]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP29]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK5-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i64 4, i1 false) +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP44]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK5-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 4, i1 false) // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done13: -// CHECK5-NEXT: [[TMP33:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP3]] to i8* -// CHECK5-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[TMP33]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) -// CHECK5-NEXT: [[TMP36:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK5-NEXT: store i32 [[TMP36]], i32* [[CONV1]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP45]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done7: +// CHECK5-NEXT: [[TMP48:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP49:%.*]] = bitcast %struct.S* [[TMP11]] to i8* +// CHECK5-NEXT: [[TMP50:%.*]] = bitcast %struct.S* [[TMP48]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP49]], i8* align 4 [[TMP50]], i64 4, i1 false) +// CHECK5-NEXT: [[TMP51:%.*]] = load i32, i32* [[SVAR4]], align 4 +// CHECK5-NEXT: store i32 [[TMP51]], i32* [[SVAR]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN14]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP37]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP52]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done15: +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done9: // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK5-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR5:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC6:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR7:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK5-NEXT: [[VAR8:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK5-NEXT: [[_TMP9:%.*]] = alloca %struct.S*, align 8 -// CHECK5-NEXT: [[SVAR10:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK5-NEXT: [[_TMP4:%.*]] = alloca %struct.S*, align 8 +// CHECK5-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK5-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* -// CHECK5-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK5-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 7 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK5-NEXT: store i32 [[TMP14]], i32* [[SVAR]], align 4 +// CHECK5-NEXT: store %struct.S* [[TMP12]], %struct.S** [[TMP]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK5-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR7]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: @@ -1132,108 +1196,108 @@ // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR8]]) -// CHECK5-NEXT: store %struct.S* [[VAR8]], %struct.S** [[_TMP9]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR5]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC6]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR7]], i64 0, i64 [[IDXPROM12]] -// CHECK5-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX13]] to i8* -// CHECK5-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[TMP16]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK5-NEXT: store i32 [[TMP26]], i32* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK5-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[ARRAYIDX8]] to i8* +// CHECK5-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[TMP28]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 4, i1 false) // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK5-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK5-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK5-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR5]], align 4 -// CHECK5-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 4 -// CHECK5-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK5-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC6]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP28:%.*]] = bitcast [2 x %struct.S]* [[S_ARR7]] to %struct.S* -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN15]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN15]], [[TMP29]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE16:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP37:%.*]] = load i32, i32* [[T_VAR3]], align 4 +// CHECK5-NEXT: store i32 [[TMP37]], i32* [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP38:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK5-NEXT: [[TMP39:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP10]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP40:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK5-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN10]], [[TMP41]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP28]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN15]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK5-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 4, i1 false) +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[TMP42:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK5-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i64 4, i1 false) // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done16: -// CHECK5-NEXT: [[TMP32:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8 -// CHECK5-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[TMP5]] to i8* -// CHECK5-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP32]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) -// CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[SVAR10]], align 4 -// CHECK5-NEXT: store i32 [[TMP35]], i32* [[CONV1]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP41]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done11: +// CHECK5-NEXT: [[TMP44:%.*]] = load %struct.S*, %struct.S** [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[TMP17]] to i8* +// CHECK5-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[TMP44]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false) +// CHECK5-NEXT: [[TMP47:%.*]] = load i32, i32* [[SVAR5]], align 4 +// CHECK5-NEXT: store i32 [[TMP47]], i32* [[SVAR]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR8]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN17:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR7]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN17]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN12]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP48]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN17]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE18:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done18: +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done13: // CHECK5-NEXT: ret void // // @@ -1251,33 +1315,33 @@ // CHECK5-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK5-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK5-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK5-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 8 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK5-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) -// CHECK5-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 8 -// CHECK5-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 8 +// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i64 0, i64 0 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK5-NEXT: store %struct.S.2* [[TEST]], %struct.S.2** [[VAR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load %struct.S.2*, %struct.S.2** [[VAR]], align 8 +// CHECK5-NEXT: store %struct.S.2* [[TMP1]], %struct.S.2** [[TMP]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP2]], i32* [[CONV]], align 4 // CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 // CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8** [[TMP5]] to [2 x i32]** // CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP6]], align 8 @@ -1295,19 +1359,19 @@ // CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK5-NEXT: store i8* null, i8** [[TMP14]], align 8 // CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK5-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to [2 x %struct.S.0]** -// CHECK5-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to [2 x %struct.S.2]** +// CHECK5-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP16]], align 8 // CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK5-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK5-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.2]** +// CHECK5-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP18]], align 8 // CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 // CHECK5-NEXT: store i8* null, i8** [[TMP19]], align 8 // CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK5-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to %struct.S.0** -// CHECK5-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to %struct.S.2** +// CHECK5-NEXT: store %struct.S.2* [[TMP4]], %struct.S.2** [[TMP21]], align 8 // CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK5-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK5-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.2** +// CHECK5-NEXT: store %struct.S.2* [[TMP4]], %struct.S.2** [[TMP23]], align 8 // CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 // CHECK5-NEXT: store i8* null, i8** [[TMP24]], align 8 // CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -1317,21 +1381,21 @@ // CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 // CHECK5-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK5: omp_offload.failed: -// CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l50([2 x i32]* [[VEC]], i64 [[TMP3]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]] +// CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l50([2 x i32]* [[VEC]], i64 [[TMP3]], [2 x %struct.S.2]* [[S_ARR]], %struct.S.2* [[TMP4]]) #[[ATTR4]] // CHECK5-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK5: omp_offload.cont: // CHECK5-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK5: arraydestroy.done2: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK5-NEXT: [[TMP30:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK5-NEXT: ret i32 [[TMP30]] // @@ -1371,400 +1435,426 @@ // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK5-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK5-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK5-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK5-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK5-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l50 -// CHECK5-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.2]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 // CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.2]*, align 8 +// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK5-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK5-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[S_ARR_ADDR]], align 8 +// CHECK5-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[VAR_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK5-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK5-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[S_ARR_ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S.2*, %struct.S.2** [[VAR_ADDR]], align 8 +// CHECK5-NEXT: store %struct.S.2* [[TMP2]], %struct.S.2** [[TMP]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store [2 x %struct.S.2]* [[TMP1]], [2 x %struct.S.2]** [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK5-NEXT: store %struct.S.2* [[TMP8]], %struct.S.2** [[TMP7]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK5-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK5-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 8 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK5-NEXT: [[_TMP3:%.*]] = alloca %struct.S.2*, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK5-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK5-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK5-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP7]], align 8 +// CHECK5-NEXT: store %struct.S.2* [[TMP8]], %struct.S.2** [[TMP]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: -// CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYCTOR_CUR]], i64 1 +// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK5-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK5-NEXT: [[TMP9:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK5-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP15]], i32* [[CONV8]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP12]], i64 [[TMP14]], [2 x i32]* [[VEC3]], i64 [[TMP16]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP17]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK5-NEXT: store i64 [[TMP18]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK5-NEXT: store i64 [[TMP20]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP22]], i64* [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP24]], i64* [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP25]], align 8 +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK5-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 8 +// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP28]], align 8 +// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP30:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP3]], align 8 +// CHECK5-NEXT: store %struct.S.2* [[TMP30]], %struct.S.2** [[TMP29]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK5-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK5-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK5-NEXT: store i32 [[TMP24]], i32* [[CONV]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK5-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP27:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN9]], [[TMP28]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP37:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK5-NEXT: store i32 [[TMP37]], i32* [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP38:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK5-NEXT: [[TMP39:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP40:%.*]] = bitcast [2 x %struct.S.2]* [[S_ARR]] to %struct.S.2* +// CHECK5-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN5]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.2* [[ARRAY_BEGIN5]], [[TMP41]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP27]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[TMP29:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK5-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i64 4, i1 false) -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done10: -// CHECK5-NEXT: [[TMP31:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8 -// CHECK5-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[TMP3]] to i8* -// CHECK5-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[TMP31]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[TMP42:%.*]] = bitcast %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK5-NEXT: [[TMP43:%.*]] = bitcast %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i64 4, i1 false) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.2* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP41]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done6: +// CHECK5-NEXT: [[TMP44:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP45:%.*]] = bitcast %struct.S.2* [[TMP9]] to i8* +// CHECK5-NEXT: [[TMP46:%.*]] = bitcast %struct.S.2* [[TMP44]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN7]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP34]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done12: +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP47]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done8: // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR4:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC5:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR6:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK5-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK5-NEXT: [[_TMP8:%.*]] = alloca %struct.S.0*, align 8 +// CHECK5-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK5-NEXT: [[_TMP4:%.*]] = alloca %struct.S.2*, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK5-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK5-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK5-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP12:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP11]], align 8 +// CHECK5-NEXT: store %struct.S.2* [[TMP12]], %struct.S.2** [[TMP]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK5-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR6]], i32 0, i32 0 -// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: -// CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYCTOR_CUR]], i64 1 +// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) -// CHECK5-NEXT: store %struct.S.0* [[VAR7]], %struct.S.0** [[_TMP8]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR4]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC5]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR6]], i64 0, i64 [[IDXPROM10]] -// CHECK5-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[ARRAYIDX11]] to i8* -// CHECK5-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[TMP16]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR3]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK5-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK5-NEXT: [[TMP28:%.*]] = bitcast %struct.S.2* [[ARRAYIDX7]] to i8* +// CHECK5-NEXT: [[TMP29:%.*]] = bitcast %struct.S.2* [[TMP26]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 4, i1 false) // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK5-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK5-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK5-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR4]], align 4 -// CHECK5-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 4 -// CHECK5-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK5-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC5]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP28:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR6]] to %struct.S.0* -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN13]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN13]], [[TMP29]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[T_VAR3]], align 4 +// CHECK5-NEXT: store i32 [[TMP35]], i32* [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP36:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK5-NEXT: [[TMP37:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[TMP10]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP38:%.*]] = bitcast [2 x %struct.S.2]* [[S_ARR]] to %struct.S.2* +// CHECK5-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN9]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.2* [[ARRAY_BEGIN9]], [[TMP39]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP28]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK5-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 4, i1 false) -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done14: -// CHECK5-NEXT: [[TMP32:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8 -// CHECK5-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[TMP5]] to i8* -// CHECK5-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[TMP32]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[TMP40:%.*]] = bitcast %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK5-NEXT: [[TMP41:%.*]] = bitcast %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i64 4, i1 false) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.2* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP39]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done10: +// CHECK5-NEXT: [[TMP42:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP43:%.*]] = bitcast %struct.S.2* [[TMP15]] to i8* +// CHECK5-NEXT: [[TMP44:%.*]] = bitcast %struct.S.2* [[TMP42]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP43]], i8* align 4 [[TMP44]], i64 4, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR6]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN15]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN11]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done16: +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP45]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done12: // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK5-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK5-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiED2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK5-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK5-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: store i32 0, i32* [[F]], align 4 // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK5-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK5-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK5-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 // CHECK5-NEXT: ret void // // @@ -1913,8 +2003,7 @@ // CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK7-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK7-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK7-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK7-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1924,27 +2013,31 @@ // CHECK7-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 // CHECK7-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[SVAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP6]], i32* [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP5]], i32 [[TMP7]]) +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK7-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[SVAR_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK7-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1952,31 +2045,38 @@ // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 4 -// CHECK7-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 4 +// CHECK7-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 +// CHECK7-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], i32* [[SVAR]], align 4 +// CHECK7-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: @@ -1986,116 +2086,128 @@ // CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store %struct.S* [[VAR5]], %struct.S** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK7-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK7-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP13]], i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK7-NEXT: store i32 [[TMP16]], i32* [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP11]], i32 [[TMP12]], [2 x i32]* [[VEC3]], i32 [[TMP14]], [2 x %struct.S]* [[S_ARR4]], %struct.S* [[TMP15]], i32 [[TMP17]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP24]], i32* [[TMP23]], align 4 +// CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP25]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4 +// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP28]], align 4 +// CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP30:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4 +// CHECK7-NEXT: store %struct.S* [[TMP30]], %struct.S** [[TMP29]], align 4 +// CHECK7-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, i32* [[SVAR4]], align 4 +// CHECK7-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK7-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK7-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK7-NEXT: br i1 [[TMP38]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP24]], i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK7-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP27:%.*]] = bitcast [2 x %struct.S]* [[S_ARR4]] to %struct.S* -// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN9]], [[TMP28]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP39:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP39]], i32* [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP40:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK7-NEXT: [[TMP41:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP42:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK7-NEXT: [[TMP43:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN6]], [[TMP43]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP27]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[TMP29:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK7-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i32 4, i1 false) +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP42]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[TMP44:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK7-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP44]], i8* align 4 [[TMP45]], i32 4, i1 false) // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done10: -// CHECK7-NEXT: [[TMP31:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[TMP3]] to i8* -// CHECK7-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[TMP31]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false) -// CHECK7-NEXT: [[TMP34:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK7-NEXT: store i32 [[TMP34]], i32* [[SVAR_ADDR]], align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP43]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done7: +// CHECK7-NEXT: [[TMP46:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[TMP11]] to i8* +// CHECK7-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[TMP46]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i32 4, i1 false) +// CHECK7-NEXT: [[TMP49:%.*]] = load i32, i32* [[SVAR4]], align 4 +// CHECK7-NEXT: store i32 [[TMP49]], i32* [[SVAR]], align 4 // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP50]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done12: +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done9: // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK7-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2104,34 +2216,44 @@ // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 4 -// CHECK7-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 4 +// CHECK7-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 +// CHECK7-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], i32* [[SVAR]], align 4 +// CHECK7-NEXT: store %struct.S* [[TMP12]], %struct.S** [[TMP]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: @@ -2141,106 +2263,106 @@ // CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store %struct.S* [[VAR5]], %struct.S** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK7-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK7-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 [[TMP17]] -// CHECK7-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* -// CHECK7-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[TMP16]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP27]] +// CHECK7-NEXT: store i32 [[TMP26]], i32* [[ARRAYIDX]], align 4 +// CHECK7-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP29]] +// CHECK7-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* +// CHECK7-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[TMP28]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 4, i1 false) // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK7-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK7-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK7-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK7-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP25]], i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK7-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP28:%.*]] = bitcast [2 x %struct.S]* [[S_ARR4]] to %struct.S* -// CHECK7-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN11]], [[TMP29]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP37:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP37]], i32* [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP38:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK7-NEXT: [[TMP39:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP10]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP40:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK7-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN8]], [[TMP41]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP28]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK7-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 4, i1 false) +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[TMP42:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK7-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i32 4, i1 false) // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done12: -// CHECK7-NEXT: [[TMP32:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[TMP5]] to i8* -// CHECK7-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP32]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i32 4, i1 false) -// CHECK7-NEXT: [[TMP35:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK7-NEXT: store i32 [[TMP35]], i32* [[SVAR_ADDR]], align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP41]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done9: +// CHECK7-NEXT: [[TMP44:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[TMP17]] to i8* +// CHECK7-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[TMP44]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i32 4, i1 false) +// CHECK7-NEXT: [[TMP47:%.*]] = load i32, i32* [[SVAR4]], align 4 +// CHECK7-NEXT: store i32 [[TMP47]], i32* [[SVAR]], align 4 // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN13]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP48]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done14: +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done11: // CHECK7-NEXT: ret void // // @@ -2258,32 +2380,32 @@ // CHECK7-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK7-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK7-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK7-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK7-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK7-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK7-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK7-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK7-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK7-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK7-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 4 -// CHECK7-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 4 +// CHECK7-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK7-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK7-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK7-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR]], align 4 +// CHECK7-NEXT: store %struct.S.1* [[TMP1]], %struct.S.1** [[TMP]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK7-NEXT: store i32 [[TMP2]], i32* [[T_VAR_CASTED]], align 4 // CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 // CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP6:%.*]] = bitcast i8** [[TMP5]] to [2 x i32]** // CHECK7-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP6]], align 4 @@ -2301,19 +2423,19 @@ // CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK7-NEXT: store i8* null, i8** [[TMP14]], align 4 // CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to [2 x %struct.S.0]** -// CHECK7-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP16]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to [2 x %struct.S.1]** +// CHECK7-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP16]], align 4 // CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK7-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.1]** +// CHECK7-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP18]], align 4 // CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 // CHECK7-NEXT: store i8* null, i8** [[TMP19]], align 4 // CHECK7-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK7-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to %struct.S.0** -// CHECK7-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[TMP21]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to %struct.S.1** +// CHECK7-NEXT: store %struct.S.1* [[TMP4]], %struct.S.1** [[TMP21]], align 4 // CHECK7-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK7-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK7-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[TMP23]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.1** +// CHECK7-NEXT: store %struct.S.1* [[TMP4]], %struct.S.1** [[TMP23]], align 4 // CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 // CHECK7-NEXT: store i8* null, i8** [[TMP24]], align 4 // CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -2323,21 +2445,21 @@ // CHECK7-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 // CHECK7-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK7: omp_offload.failed: -// CHECK7-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l50([2 x i32]* [[VEC]], i32 [[TMP3]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]] +// CHECK7-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l50([2 x i32]* [[VEC]], i32 [[TMP3]], [2 x %struct.S.1]* [[S_ARR]], %struct.S.1* [[TMP4]]) #[[ATTR4]] // CHECK7-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK7: omp_offload.cont: // CHECK7-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK7: arraydestroy.done2: -// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK7-NEXT: [[TMP30:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK7-NEXT: ret i32 [[TMP30]] // @@ -2377,205 +2499,227 @@ // // // CHECK7-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK7-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK7-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK7-NEXT: entry: -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK7-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK7-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK7-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK7-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK7-NEXT: entry: -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK7-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l50 -// CHECK7-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.1]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 // CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 4 +// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK7-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 +// CHECK7-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK7-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK7-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 4 +// CHECK7-NEXT: store %struct.S.1* [[TMP2]], %struct.S.1** [[TMP]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store [2 x %struct.S.1]* [[TMP1]], [2 x %struct.S.1]** [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK7-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP7]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca %struct.S.1*, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK7-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 4 +// CHECK7-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: -// CHECK7-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK7-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK7-NEXT: [[TMP9:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK7-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK7-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP13]], i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP11]], i32 [[TMP12]], [2 x i32]* [[VEC3]], i32 [[TMP14]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP15]]) +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP17]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP23]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP26]], align 4 +// CHECK7-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP28:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 4 +// CHECK7-NEXT: store %struct.S.1* [[TMP28]], %struct.S.1** [[TMP27]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK7-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK7-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP22]], i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP23:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK7-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP23]], i8* align 4 [[TMP24]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP25:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN8]], [[TMP26]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP35]], i32* [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK7-NEXT: [[TMP37:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP38:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK7-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN5]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN5]], [[TMP39]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP25]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK7-NEXT: [[TMP28:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i32 4, i1 false) -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done9: -// CHECK7-NEXT: [[TMP29:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[TMP3]] to i8* -// CHECK7-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[TMP29]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 4, i1 false) +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[TMP40:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK7-NEXT: [[TMP41:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i32 4, i1 false) +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP39]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done6: +// CHECK7-NEXT: [[TMP42:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP43:%.*]] = bitcast %struct.S.1* [[TMP9]] to i8* +// CHECK7-NEXT: [[TMP44:%.*]] = bitcast %struct.S.1* [[TMP42]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP43]], i8* align 4 [[TMP44]], i32 4, i1 false) // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN7]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP32]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done11: +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP45]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done8: // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2583,183 +2727,191 @@ // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca %struct.S.1*, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK7-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP11]], align 4 +// CHECK7-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: -// CHECK7-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK7-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK7-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK7-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK7-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 [[TMP17]] -// CHECK7-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* -// CHECK7-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[TMP16]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP25]] +// CHECK7-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK7-NEXT: [[TMP28:%.*]] = bitcast %struct.S.1* [[ARRAYIDX5]] to i8* +// CHECK7-NEXT: [[TMP29:%.*]] = bitcast %struct.S.1* [[TMP26]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 4, i1 false) // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK7-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK7-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK7-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK7-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP25]], i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK7-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP28:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK7-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN10]], [[TMP29]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP35]], i32* [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK7-NEXT: [[TMP37:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP10]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP38:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK7-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN7]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN7]], [[TMP39]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP28]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK7-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 4, i1 false) -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done11: -// CHECK7-NEXT: [[TMP32:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[TMP5]] to i8* -// CHECK7-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[TMP32]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i32 4, i1 false) +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[TMP40:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK7-NEXT: [[TMP41:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i32 4, i1 false) +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP39]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done8: +// CHECK7-NEXT: [[TMP42:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP43:%.*]] = bitcast %struct.S.1* [[TMP15]] to i8* +// CHECK7-NEXT: [[TMP44:%.*]] = bitcast %struct.S.1* [[TMP42]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP43]], i8* align 4 [[TMP44]], i32 4, i1 false) // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN9]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done13: +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP45]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done10: // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK7-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK7-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK7-NEXT: entry: -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK7-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK7-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK7-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK7-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK7-NEXT: entry: -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK7-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK7-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK7-NEXT: store i32 0, i32* [[F]], align 4 // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK7-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK7-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK7-NEXT: entry: -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK7-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK7-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK7-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK7-NEXT: entry: -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK7-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK7-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK7-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp @@ -314,29 +314,36 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -352,55 +359,63 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -410,12 +425,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -430,14 +446,20 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -453,70 +475,70 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM3]] -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP25]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -529,23 +551,23 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) -// CHECK1-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK1-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK1-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 8 +// CHECK1-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK1-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2) // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0) // CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 @@ -555,292 +577,314 @@ // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done2: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK1-NEXT: ret i32 [[TMP4]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN4]], i64 2 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false) +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S.1* [[ARRAYIDX6]] to i8* +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S.1* [[TMP17]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP19]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP24]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done9: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -849,11 +893,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -1007,29 +1051,36 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -1045,53 +1096,61 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i32 2 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -1101,12 +1160,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1121,14 +1181,20 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 @@ -1142,68 +1208,68 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP12]] -// CHECK3-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP17]] +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] // CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP25]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1216,23 +1282,23 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK3-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 4 -// CHECK3-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK3-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 4 +// CHECK3-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK3-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2) // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0) // CHECK3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 @@ -1242,286 +1308,308 @@ // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK3: arraydestroy.done2: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK3-NEXT: ret i32 [[TMP4]] // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK3-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: -// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK3-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN4]], i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK3: arraydestroy.done5: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: -// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK3-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast %struct.S.1* [[ARRAYIDX4]] to i8* +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast %struct.S.1* [[TMP17]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP19]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP24]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK3: arraydestroy.done7: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1530,11 +1618,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1680,15 +1768,17 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104 // CHECK5-SAME: () #[[ATTR5:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 @@ -1696,68 +1786,82 @@ // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32* undef, i32** [[_TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: store i32* [[G1]], i32** [[_TMP2]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 @@ -1770,74 +1874,80 @@ // CHECK5-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK5-NEXT: store i32* undef, i32** [[_TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: store i32* [[G1]], i32** [[_TMP3]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK5-NEXT: store i32 1, i32* [[G]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK5-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK5-NEXT: store volatile i32 1, i32* [[TMP15]], align 4 // CHECK5-NEXT: store i32 2, i32* [[SIVAR]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store i32* [[G]], i32** [[TMP11]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK5-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK5-NEXT: store i32* [[SIVAR]], i32** [[TMP14]], align 8 -// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store i32* [[G]], i32** [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK5-NEXT: store i32* [[TMP18]], i32** [[TMP17]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK5-NEXT: store i32* [[SIVAR]], i32** [[TMP19]], align 8 +// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK5-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // // @@ -1860,29 +1970,36 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK13-SAME: () #[[ATTR0:[0-9]+]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -1898,55 +2015,63 @@ // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: // CHECK13-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) +// CHECK13-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) // CHECK13-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK13-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 +// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -1966,12 +2091,13 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1986,14 +2112,20 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -2009,70 +2141,70 @@ // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: // CHECK13-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP17]] to i64 // CHECK13-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM3]] -// CHECK13-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* -// CHECK13-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false) -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4 -// CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK13-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] // CHECK13-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK13-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK13-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) // CHECK13-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP25]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -2094,237 +2226,259 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK13-SAME: () #[[ATTR0]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK13-NEXT: [[_TMP1:%.*]] = alloca %struct.S.2*, align 8 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK13-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 8 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK13-NEXT: [[_TMP2:%.*]] = alloca %struct.S.2*, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK13-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: store %struct.S.2* undef, %struct.S.2** [[_TMP1]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 // CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK13: arrayctor.loop: -// CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] +// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYCTOR_CUR]], i64 1 +// CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: -// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK13-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK13-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP2]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN4]], i64 2 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN4]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK13: arraydestroy.done5: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK13-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK13-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK13-NEXT: entry: -// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK13-NEXT: [[_TMP1:%.*]] = alloca %struct.S.2*, align 8 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK13-NEXT: [[_TMP3:%.*]] = alloca %struct.S.0*, align 8 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK13-NEXT: [[_TMP3:%.*]] = alloca %struct.S.2*, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK13-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store %struct.S.2* undef, %struct.S.2** [[_TMP1]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 // CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK13: arrayctor.loop: -// CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] +// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYCTOR_CUR]], i64 1 +// CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: -// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK13-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP3]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK13-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP3]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK13-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* -// CHECK13-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false) +// CHECK13-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP3]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK13-NEXT: [[TMP19:%.*]] = bitcast %struct.S.2* [[ARRAYIDX6]] to i8* +// CHECK13-NEXT: [[TMP20:%.*]] = bitcast %struct.S.2* [[TMP17]] to i8* +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 4, i1 false) // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i64 2 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN8]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP19]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP24]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK13: arraydestroy.done9: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK13-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK13-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK13-NEXT: entry: -// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: call void @_ZN1SIiED2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] // CHECK13-NEXT: ret void // // @@ -2351,52 +2505,59 @@ // // // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK13-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK13-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK13-NEXT: entry: -// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 // CHECK13-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 4 // CHECK13-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK13-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK13-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK13-NEXT: entry: -// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 // CHECK13-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK15-SAME: () #[[ATTR0:[0-9]+]] { // CHECK15-NEXT: entry: -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -2412,53 +2573,61 @@ // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: // CHECK15-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK15-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[TMP10]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK15-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) // CHECK15-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK15-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i32 2 +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -2478,12 +2647,13 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2498,14 +2668,20 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 @@ -2519,68 +2695,68 @@ // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: // CHECK15-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK15-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP12]] -// CHECK15-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* -// CHECK15-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false) -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4 -// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP17]] +// CHECK15-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* +// CHECK15-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] // CHECK15-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK15-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK15-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) // CHECK15-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP25]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -2602,231 +2778,253 @@ // CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK15-SAME: () #[[ATTR0]] { // CHECK15-NEXT: entry: -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK15-NEXT: [[_TMP1:%.*]] = alloca %struct.S.2*, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK15-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK15-NEXT: [[_TMP2:%.*]] = alloca %struct.S.2*, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK15-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: store %struct.S.2* undef, %struct.S.2** [[_TMP1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i32 2 // CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK15: arrayctor.loop: -// CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] +// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYCTOR_CUR]], i32 1 +// CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: -// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK15-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK15-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP2]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[TMP10]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN4]], i32 2 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN4]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK15: arraydestroy.done5: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK15-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK15-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK15-NEXT: entry: -// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 4 +// CHECK15-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK15-NEXT: [[_TMP1:%.*]] = alloca %struct.S.2*, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK15-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK15-NEXT: [[_TMP2:%.*]] = alloca %struct.S.2*, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK15-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store %struct.S.2* undef, %struct.S.2** [[_TMP1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i32 2 // CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK15: arrayctor.loop: -// CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] +// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYCTOR_CUR]], i32 1 +// CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: -// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK15-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK15-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP2]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK15-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK15-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* -// CHECK15-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false) +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP2]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK15-NEXT: [[TMP19:%.*]] = bitcast %struct.S.2* [[ARRAYIDX4]] to i8* +// CHECK15-NEXT: [[TMP20:%.*]] = bitcast %struct.S.2* [[TMP17]] to i8* +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i32 4, i1 false) // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK15-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i32 2 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN6]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP19]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP24]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK15: arraydestroy.done7: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK15-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK15-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK15-NEXT: entry: -// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 4 +// CHECK15-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: call void @_ZN1SIiED2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] // CHECK15-NEXT: ret void // // @@ -2853,38 +3051,40 @@ // // // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK15-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK15-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK15-NEXT: entry: -// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 4 +// CHECK15-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 // CHECK15-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 4 // CHECK15-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK15-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK15-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK15-NEXT: entry: -// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 4 +// CHECK15-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 4 // CHECK15-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104 // CHECK17-SAME: () #[[ATTR0:[0-9]+]] { // CHECK17-NEXT: entry: -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 @@ -2892,68 +3092,82 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: store i32* undef, i32** [[_TMP1]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK17-NEXT: store i32* [[G1]], i32** [[_TMP2]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK17-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK17-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 @@ -2969,70 +3183,76 @@ // CHECK17-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK17-NEXT: store i32* undef, i32** [[_TMP1]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK17-NEXT: store i32* [[G1]], i32** [[_TMP3]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK17-NEXT: store i32 1, i32* [[G]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK17-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK17-NEXT: store volatile i32 1, i32* [[TMP15]], align 4 // CHECK17-NEXT: store i32 2, i32* [[SIVAR]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0 -// CHECK17-NEXT: store i32* [[G]], i32** [[TMP11]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK17-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 2 -// CHECK17-NEXT: store i32* [[SIVAR]], i32** [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[G]], i32** [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK17-NEXT: store i32* [[TMP18]], i32** [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[SIVAR]], i32** [[TMP19]], align 8 // CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK17-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK17-NEXT: ret void // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp @@ -78,78 +78,94 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l36 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 4) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 4) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -159,135 +175,157 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l38 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -297,57 +335,63 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -369,78 +413,94 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l30 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -450,57 +510,63 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp @@ -125,195 +125,219 @@ // CHECK1-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i32* [[SIVAR1]]) +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[TMP18]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP23]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP27]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[SIVAR2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR2]] to i8* -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, i8* [[TMP22]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -404,195 +428,219 @@ // CHECK1-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[T_VAR1]], align 4 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i32* [[T_VAR1]]) +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP18]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[T_VAR1]] to i8* -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP23]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP27]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[T_VAR2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[T_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[T_VAR2]] to i8* -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, i8* [[TMP22]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -688,191 +736,215 @@ // CHECK3-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], i32* [[SIVAR1]]) +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[TMP16]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP14]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, i8* [[TMP22]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -963,191 +1035,215 @@ // CHECK3-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[T_VAR1]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], i32* [[T_VAR1]]) +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[T_VAR]], i32** [[TMP16]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = bitcast i32* [[T_VAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP14]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[T_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i32* [[T_VAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, i8* [[TMP22]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1222,199 +1318,223 @@ // CHECK5-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i32* [[SIVAR1]]) +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store i32* [[SIVAR]], i32** [[TMP18]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK5-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK5-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP22:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK5-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK5-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP23]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK5-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP27]] monotonic, align 4 // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: store i32 0, i32* [[SIVAR2]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK5-NEXT: store i32 0, i32* [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR2]], align 4 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK5-NEXT: store i32 [[ADD4]], i32* [[SIVAR2]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store i32* [[SIVAR2]], i32** [[TMP13]], align 8 -// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store i32* [[SIVAR]], i32** [[TMP19]], align 8 +// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK5-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK5-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP16:%.*]] = bitcast i32* [[SIVAR2]] to i8* -// CHECK5-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK5-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP17]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP22:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK5-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK5-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, i8* [[TMP23]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR2]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK5-NEXT: store i32 [[ADD6]], i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK5-NEXT: store i32 [[ADD5]], i32* [[TMP6]], align 4 +// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR2]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP21]] monotonic, align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP27]] monotonic, align 4 // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -52,265 +52,286 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]], i8** [[TMP1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store i8** [[TMP3]], i8*** [[TMP2]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[_TMP12:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP11:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP26:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint i8* [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP13]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP14]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8*, i8** [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint i8* [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP16]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint i8* [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store i8** [[_TMP5]], i8*** [[TMP]], align 8 -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint i8* [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store i8** [[_TMP4]], i8*** [[TMP]], align 8 +// CHECK1-NEXT: store i8* [[TMP24]], i8** [[_TMP4]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, i64* [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[TMP30]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP31]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8*, i8** [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load i8*, i8** [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, i8* [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP36]] -// CHECK1-NEXT: [[TMP37:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8*, i8** [[TMP37]], i64 9 -// CHECK1-NEXT: [[TMP38:%.*]] = load i8*, i8** [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[TMP38]], i64 [[LB_ADD_LEN9]] -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store i8* [[ARRAYIDX8]], i8** [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint i8* [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint i8* [[ARRAYIDX8]] to i64 -// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] -// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 -// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP45]], i64* [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..1 to i8*), i8** [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..2 to i8*), i8** [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, i32* [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 -// CHECK1-NEXT: [[TMP53:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* -// CHECK1-NEXT: [[TMP54:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, i8* [[TMP53]]) -// CHECK1-NEXT: store i8* [[TMP54]], i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[ARGC]] to i8* +// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP2]] to i8* +// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, i64* [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP34]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP36:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP36]], i64 0 +// CHECK1-NEXT: [[TMP37:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP37]], i64 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 -1, [[TMP39]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8*, i8** [[TMP40]], i64 9 +// CHECK1-NEXT: [[TMP41:%.*]] = load i8*, i8** [[ARRAYIDX9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 [[LB_ADD_LEN8]] +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 +// CHECK1-NEXT: store i8* [[ARRAYIDX7]], i8** [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = ptrtoint i8* [[ARRAYIDX10]] to i64 +// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint i8* [[ARRAYIDX7]] to i64 +// CHECK1-NEXT: [[TMP45:%.*]] = sub i64 [[TMP43]], [[TMP44]] +// CHECK1-NEXT: [[TMP46:%.*]] = sdiv exact i64 [[TMP45]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP47:%.*]] = add nuw i64 [[TMP46]], 1 +// CHECK1-NEXT: [[TMP48:%.*]] = mul nuw i64 [[TMP47]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP48]], i64* [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..1 to i8*), i8** [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP51]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..2 to i8*), i8** [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, i32* [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[TMP54]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* +// CHECK1-NEXT: [[TMP57:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP55]], i32 1, i32 2, i8* [[TMP56]]) +// CHECK1-NEXT: store i8* [[TMP57]], i8** [[DOTTASK_RED_]], align 8 // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 // CHECK1-NEXT: store i64 9, i64* [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP56]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP57]], 9 +// CHECK1-NEXT: [[TMP58:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, i32* [[TMP58]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP59]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP60:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP60]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP58]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP61]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP59]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP62]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP60]], [[TMP61]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP63:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP63]], [[TMP64]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load i8**, i8*** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP62]], i64 [[TMP63]], i32* [[ARGC1]], i8** [[TMP64]]) +// CHECK1-NEXT: [[TMP65:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP65]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP66]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP68:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP68]], i64* [[TMP67]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP70:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP70]], i64* [[TMP69]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[ARGC]], i32** [[TMP71]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP73:%.*]] = load i8**, i8*** [[TMP]], align 8 +// CHECK1-NEXT: store i8** [[TMP73]], i8*** [[TMP72]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP65:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP65]], [[TMP66]] +// CHECK1-NEXT: [[TMP74:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP74]], [[TMP75]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP67:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = load i32, i32* [[TMP67]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP68]]) -// CHECK1-NEXT: [[TMP69:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = load i32, i32* [[TMP69]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP70]], i32 1) -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP72:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP72]], i8** [[TMP71]], align 8 -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP73]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP75:%.*]] = inttoptr i64 [[TMP11]] to i8* -// CHECK1-NEXT: store i8* [[TMP75]], i8** [[TMP74]], align 8 // CHECK1-NEXT: [[TMP76:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP77:%.*]] = load i32, i32* [[TMP76]], align 4 -// CHECK1-NEXT: [[TMP78:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP79:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP77]], i32 2, i64 24, i8* [[TMP78]], void (i8*, i8*)* @.omp.reduction.reduction_func.9, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP79]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP77]]) +// CHECK1-NEXT: [[TMP78:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP79:%.*]] = load i32, i32* [[TMP78]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP79]], i32 1) +// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP81:%.*]] = bitcast i32* [[ARGC]] to i8* +// CHECK1-NEXT: store i8* [[TMP81]], i8** [[TMP80]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP82]], align 8 +// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP84:%.*]] = inttoptr i64 [[TMP14]] to i8* +// CHECK1-NEXT: store i8* [[TMP84]], i8** [[TMP83]], align 8 +// CHECK1-NEXT: [[TMP85:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP86:%.*]] = load i32, i32* [[TMP85]], align 4 +// CHECK1-NEXT: [[TMP87:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP88:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP86]], i32 2, i64 24, i8* [[TMP87]], void (i8*, i8*)* @.omp.reduction.reduction_func.9, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP88]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP80:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP80]], [[TMP81]] -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP82]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP89:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP90:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP89]], [[TMP90]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP91]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP83:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP83]] to i32 -// CHECK1-NEXT: [[TMP84:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP84]] to i32 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] -// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 -// CHECK1-NEXT: store i8 [[CONV18]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP92:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP92]] to i32 +// CHECK1-NEXT: [[TMP93:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV15:%.*]] = sext i8 [[TMP93]] to i32 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV]], [[CONV15]] +// CHECK1-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 +// CHECK1-NEXT: store i8 [[CONV17]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP82]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done21: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB4]], i32 [[TMP77]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP91]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done20: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB4]], i32 [[TMP86]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP86:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP85]] monotonic, align 4 -// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP87]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] -// CHECK1: omp.arraycpy.body23: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP88:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP88]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 +// CHECK1-NEXT: [[TMP94:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP95:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP94]] monotonic, align 4 +// CHECK1-NEXT: [[TMP96:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY21:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP96]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY21]], label [[OMP_ARRAYCPY_DONE34:%.*]], label [[OMP_ARRAYCPY_BODY22:%.*]] +// CHECK1: omp.arraycpy.body22: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST23:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT32:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST24:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT31:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP97:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST23]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP97]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST24]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP89:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP94:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP89]], i8* [[_TMP27]], align 1 -// CHECK1-NEXT: [[TMP90:%.*]] = load i8, i8* [[_TMP27]], align 1 -// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP90]] to i32 -// CHECK1-NEXT: [[TMP91:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP91]] to i32 -// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] -// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 -// CHECK1-NEXT: store i8 [[CONV31]], i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP92:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP93:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP89]], i8 [[TMP92]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP94]] = extractvalue { i8, i1 } [[TMP93]], 0 -// CHECK1-NEXT: [[TMP95:%.*]] = extractvalue { i8, i1 } [[TMP93]], 1 -// CHECK1-NEXT: br i1 [[TMP95]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP98:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY22]] ], [ [[TMP103:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP98]], i8* [[_TMP26]], align 1 +// CHECK1-NEXT: [[TMP99:%.*]] = load i8, i8* [[_TMP26]], align 1 +// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP99]] to i32 +// CHECK1-NEXT: [[TMP100:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST23]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP100]] to i32 +// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[CONV27]], [[CONV28]] +// CHECK1-NEXT: [[CONV30:%.*]] = trunc i32 [[ADD29]] to i8 +// CHECK1-NEXT: store i8 [[CONV30]], i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP101:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP102:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST24]], i8 [[TMP98]], i8 [[TMP101]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP103]] = extractvalue { i8, i1 } [[TMP102]], 0 +// CHECK1-NEXT: [[TMP104:%.*]] = extractvalue { i8, i1 } [[TMP102]], 1 +// CHECK1-NEXT: br i1 [[TMP104]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP87]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] -// CHECK1: omp.arraycpy.done35: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT31]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT32]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST23]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE33:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT31]], [[TMP96]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_BODY22]] +// CHECK1: omp.arraycpy.done34: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP96:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP96]]) +// CHECK1-NEXT: [[TMP105:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP105]]) // CHECK1-NEXT: ret void // // @@ -399,291 +420,296 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.1], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP28:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 9, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP1]], i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP3]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP6]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8*, i8** [[TMP7]], i64 9 -// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP8]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint i8* [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = add nuw i64 [[TMP12]], 1 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP15:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP13]], align 16 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP16]] +// CHECK1-NEXT: store i32 0, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP13]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8*, i8** [[TMP14]], i64 9 +// CHECK1-NEXT: [[TMP15:%.*]] = load i8*, i8** [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP15]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint i8* [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = sub i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP19:%.*]] = sdiv exact i64 [[TMP18]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP20:%.*]] = add nuw i64 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP22]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP20]], align 16 +// CHECK1-NEXT: store i64 [[TMP20]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP23]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP17:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i8*, i8** [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint i8* [[TMP18]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = sub i64 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: [[TMP22:%.*]] = sdiv exact i64 [[TMP21]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP22]] -// CHECK1-NEXT: store i8** [[_TMP6]], i8*** [[_TMP5]], align 8 -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[_TMP6]], align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], [2 x %struct.kmp_taskred_input_t.0]* [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, i64* [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..4 to i8*), i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..5 to i8*), i8** [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[TMP32]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP33]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], [2 x %struct.kmp_taskred_input_t.0]* [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8*, i8** [[TMP35]], i64 0 -// CHECK1-NEXT: [[TMP36:%.*]] = load i8*, i8** [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, i8* [[TMP36]], i64 0 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = sext i32 [[TMP37]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP38]] -// CHECK1-NEXT: [[TMP39:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8*, i8** [[TMP39]], i64 9 -// CHECK1-NEXT: [[TMP40:%.*]] = load i8*, i8** [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, i8* [[TMP40]], i64 [[LB_ADD_LEN10]] -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 -// CHECK1-NEXT: store i8* [[ARRAYIDX9]], i8** [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = ptrtoint i8* [[ARRAYIDX12]] to i64 -// CHECK1-NEXT: [[TMP43:%.*]] = ptrtoint i8* [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP44:%.*]] = sub i64 [[TMP42]], [[TMP43]] -// CHECK1-NEXT: [[TMP45:%.*]] = sdiv exact i64 [[TMP44]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = add nuw i64 [[TMP45]], 1 -// CHECK1-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP47]], i64* [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..6 to i8*), i8** [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..7 to i8*), i8** [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, i32* [[TMP52]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t.0]* [[DOTRD_INPUT_]] to i8* -// CHECK1-NEXT: [[TMP56:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP54]], i32 1, i32 2, i8* [[TMP55]]) -// CHECK1-NEXT: store i8* [[TMP56]], i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP57]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP58]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP59]], 9 +// CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i8*, i8** [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = ptrtoint i8* [[TMP25]] to i64 +// CHECK1-NEXT: [[TMP27:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP28:%.*]] = sub i64 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: [[TMP29:%.*]] = sdiv exact i64 [[TMP28]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP29]] +// CHECK1-NEXT: store i8** [[_TMP5]], i8*** [[_TMP4]], align 8 +// CHECK1-NEXT: store i8* [[TMP30]], i8** [[_TMP5]], align 8 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.1], [2 x %struct.kmp_taskred_input_t.1]* [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1:%.*]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast i32* [[ARGC]] to i8* +// CHECK1-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP6]] to i8* +// CHECK1-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, i64* [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..4 to i8*), i8** [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..5 to i8*), i8** [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP40:%.*]] = bitcast i32* [[TMP39]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP40]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.1], [2 x %struct.kmp_taskred_input_t.1]* [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8*, i8** [[TMP42]], i64 0 +// CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[ARRAYIDX7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, i8* [[TMP43]], i64 0 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = sext i32 [[TMP44]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP45]] +// CHECK1-NEXT: [[TMP46:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8*, i8** [[TMP46]], i64 9 +// CHECK1-NEXT: [[TMP47:%.*]] = load i8*, i8** [[ARRAYIDX10]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[TMP47]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 +// CHECK1-NEXT: store i8* [[ARRAYIDX8]], i8** [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = ptrtoint i8* [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP50:%.*]] = ptrtoint i8* [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP51:%.*]] = sub i64 [[TMP49]], [[TMP50]] +// CHECK1-NEXT: [[TMP52:%.*]] = sdiv exact i64 [[TMP51]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP53:%.*]] = add nuw i64 [[TMP52]], 1 +// CHECK1-NEXT: [[TMP54:%.*]] = mul nuw i64 [[TMP53]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP54]], i64* [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..6 to i8*), i8** [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP57]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..7 to i8*), i8** [[TMP58]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, i32* [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, i32* [[TMP60]], align 4 +// CHECK1-NEXT: [[TMP62:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t.1]* [[DOTRD_INPUT_]] to i8* +// CHECK1-NEXT: [[TMP63:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP61]], i32 1, i32 2, i8* [[TMP62]]) +// CHECK1-NEXT: store i8* [[TMP63]], i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = load i32, i32* [[TMP64]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP65]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP66:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP66]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP60]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP67]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP61]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP68]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP62]], [[TMP63]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP69:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP69]], [[TMP70]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP64:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP64]], 1 +// CHECK1-NEXT: [[TMP71:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP71]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP65]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[ARGC1]], i32** [[TMP66]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP68:%.*]] = load i8**, i8*** [[_TMP5]], align 8 -// CHECK1-NEXT: store i8** [[TMP68]], i8*** [[TMP67]], align 8 -// CHECK1-NEXT: [[TMP69:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = load i32, i32* [[TMP69]], align 4 -// CHECK1-NEXT: [[TMP71:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP70]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP72:%.*]] = bitcast i8* [[TMP71]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP72]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP73]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP75:%.*]] = load i8*, i8** [[TMP74]], align 8 -// CHECK1-NEXT: [[TMP76:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP75]], i8* align 8 [[TMP76]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP72]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP77]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP79:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store i8* [[TMP79]], i8** [[TMP78]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, i32* [[TMP80]], align 4 -// CHECK1-NEXT: [[TMP82:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP81]], i8* [[TMP71]]) +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP72]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[ARGC]], i32** [[TMP73]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP75:%.*]] = load i8**, i8*** [[_TMP4]], align 8 +// CHECK1-NEXT: store i8** [[TMP75]], i8*** [[TMP74]], align 8 +// CHECK1-NEXT: [[TMP76:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = load i32, i32* [[TMP76]], align 4 +// CHECK1-NEXT: [[TMP78:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP77]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP79:%.*]] = bitcast i8* [[TMP78]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP79]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP80]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP82:%.*]] = load i8*, i8** [[TMP81]], align 8 +// CHECK1-NEXT: [[TMP83:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP82]], i8* align 8 [[TMP83]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP79]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP84]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP86:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store i8* [[TMP86]], i8** [[TMP85]], align 8 +// CHECK1-NEXT: [[TMP87:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, i32* [[TMP87]], align 4 +// CHECK1-NEXT: [[TMP89:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP88]], i8* [[TMP78]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP83:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP83]], 1 -// CHECK1-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP90:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP90]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP84:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, i32* [[TMP84]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP85]]) -// CHECK1-NEXT: [[TMP86:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP87:%.*]] = load i32, i32* [[TMP86]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP87]], i32 1) -// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP89:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP89]], i8** [[TMP88]], align 8 -// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP90]], align 8 -// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP13]] to i8* -// CHECK1-NEXT: store i8* [[TMP92]], i8** [[TMP91]], align 8 +// CHECK1-NEXT: [[TMP91:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP92:%.*]] = load i32, i32* [[TMP91]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP92]]) // CHECK1-NEXT: [[TMP93:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP94:%.*]] = load i32, i32* [[TMP93]], align 4 -// CHECK1-NEXT: [[TMP95:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP96:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB4]], i32 [[TMP94]], i32 2, i64 24, i8* [[TMP95]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP96]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP94]], i32 1) +// CHECK1-NEXT: [[TMP95:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP96:%.*]] = bitcast i32* [[ARGC]] to i8* +// CHECK1-NEXT: store i8* [[TMP96]], i8** [[TMP95]], align 8 +// CHECK1-NEXT: [[TMP97:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP97]], align 8 +// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP99:%.*]] = inttoptr i64 [[TMP20]] to i8* +// CHECK1-NEXT: store i8* [[TMP99]], i8** [[TMP98]], align 8 +// CHECK1-NEXT: [[TMP100:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP101:%.*]] = load i32, i32* [[TMP100]], align 4 +// CHECK1-NEXT: [[TMP102:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP103:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB4]], i32 [[TMP101]], i32 2, i64 24, i8* [[TMP102]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP103]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP97:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP98:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP97]], [[TMP98]] -// CHECK1-NEXT: store i32 [[ADD15]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP99:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP99]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP104:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP105:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP104]], [[TMP105]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP106:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP106]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP100:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP100]] to i32 -// CHECK1-NEXT: [[TMP101:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP101]] to i32 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]] -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP107:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP107]] to i32 +// CHECK1-NEXT: [[TMP108:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP108]] to i32 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK1-NEXT: store i8 [[CONV18]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP99]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done22: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB4]], i32 [[TMP94]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP106]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done21: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB4]], i32 [[TMP101]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP102:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP103:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP102]] monotonic, align 4 -// CHECK1-NEXT: [[TMP104:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP104]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] -// CHECK1: omp.arraycpy.body24: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP105:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP105]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1 +// CHECK1-NEXT: [[TMP109:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP110:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP109]] monotonic, align 4 +// CHECK1-NEXT: [[TMP111:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP111]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] +// CHECK1: omp.arraycpy.body23: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP112:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP112]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP106:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP111:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP106]], i8* [[_TMP28]], align 1 -// CHECK1-NEXT: [[TMP107:%.*]] = load i8, i8* [[_TMP28]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP107]] to i32 -// CHECK1-NEXT: [[TMP108:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP108]] to i32 -// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]] -// CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK1-NEXT: store i8 [[CONV32]], i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP109:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP110:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP106]], i8 [[TMP109]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP111]] = extractvalue { i8, i1 } [[TMP110]], 0 -// CHECK1-NEXT: [[TMP112:%.*]] = extractvalue { i8, i1 } [[TMP110]], 1 -// CHECK1-NEXT: br i1 [[TMP112]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP113:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP118:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP113]], i8* [[_TMP27]], align 1 +// CHECK1-NEXT: [[TMP114:%.*]] = load i8, i8* [[_TMP27]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP114]] to i32 +// CHECK1-NEXT: [[TMP115:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP115]] to i32 +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] +// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK1-NEXT: store i8 [[CONV31]], i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP116:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP117:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP113]], i8 [[TMP116]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP118]] = extractvalue { i8, i1 } [[TMP117]], 0 +// CHECK1-NEXT: [[TMP119:%.*]] = extractvalue { i8, i1 } [[TMP117]], 1 +// CHECK1-NEXT: br i1 [[TMP119]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP104]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]] -// CHECK1: omp.arraycpy.done36: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP111]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] +// CHECK1: omp.arraycpy.done35: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP113:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP113]]) +// CHECK1-NEXT: [[TMP120:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP120]]) // CHECK1-NEXT: ret void // // @@ -793,7 +819,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP4_I:%.*]] = alloca i8*, align 8 @@ -807,7 +833,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -820,29 +846,29 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load i8*, i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 // CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load i8**, i8*** [[TMP30]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i8*, i8** [[TMP31]], i64 9 // CHECK1-NEXT: [[TMP32:%.*]] = load i8*, i8** [[ARRAYIDX2_I]], align 8 @@ -853,10 +879,10 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 // CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint i8* [[TMP43]] to i64 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp @@ -333,83 +333,101 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -419,64 +437,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -484,83 +508,101 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -570,64 +612,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -635,83 +683,101 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -721,85 +787,91 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -807,83 +879,101 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -893,55 +983,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -956,83 +1052,101 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1042,55 +1156,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1247,81 +1367,99 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1331,61 +1469,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1393,81 +1537,99 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1477,61 +1639,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1539,81 +1707,99 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1623,80 +1809,86 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP15]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP21]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1704,81 +1896,99 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1788,52 +1998,58 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP18]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -1848,81 +2064,99 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1932,52 +2166,58 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP18]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -2134,83 +2374,101 @@ // CHECK5-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2220,64 +2478,70 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK5-NEXT: ret void // // @@ -2285,83 +2549,101 @@ // CHECK5-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2371,64 +2653,70 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK5-NEXT: ret void // // @@ -2436,83 +2724,101 @@ // CHECK5-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2522,85 +2828,91 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK5-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK5-NEXT: ret void // // @@ -2608,83 +2920,101 @@ // CHECK5-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2694,55 +3024,61 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -2757,83 +3093,101 @@ // CHECK5-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2843,55 +3197,61 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3048,81 +3408,99 @@ // CHECK7-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3132,61 +3510,67 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK7-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK7-NEXT: ret void // // @@ -3194,81 +3578,99 @@ // CHECK7-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3278,61 +3680,67 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK7-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK7-NEXT: ret void // // @@ -3340,81 +3748,99 @@ // CHECK7-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3424,80 +3850,86 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP15]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP21]] // CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK7-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK7-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK7-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK7-NEXT: ret void // // @@ -3505,81 +3937,99 @@ // CHECK7-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3589,52 +4039,58 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK7-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP18]] // CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK7: omp.inner.for.end: @@ -3649,81 +4105,99 @@ // CHECK7-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3733,52 +4207,58 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK7-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP18]] // CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK7: omp.inner.for.end: @@ -4174,29 +4654,31 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4206,95 +4688,110 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK13-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4304,87 +4801,94 @@ // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK13-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void @@ -4396,29 +4900,31 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4428,95 +4934,110 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK13-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4526,87 +5047,94 @@ // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK13-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void @@ -4619,8 +5147,7 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -4629,250 +5156,273 @@ // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK13-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK13-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK13-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]) +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK13-NEXT: store i64 [[TMP24]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK13-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP28]], i64* [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP30]], i64* [[TMP29]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 8 +// CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[TMP33]], align 8 +// CHECK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: store i32* [[TMP6]], i32** [[TMP34]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK13-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK13-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] -// CHECK13-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] -// CHECK13: cond.true14: -// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK13-NEXT: br label [[COND_END16:%.*]] -// CHECK13: cond.false15: -// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: br label [[COND_END16]] -// CHECK13: cond.end16: -// CHECK13-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE14]] ], [ [[TMP33]], [[COND_FALSE15]] ] -// CHECK13-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] +// CHECK13-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK13: cond.true11: +// CHECK13-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: br label [[COND_END13:%.*]] +// CHECK13: cond.false12: +// CHECK13-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: br label [[COND_END13]] +// CHECK13: cond.end13: +// CHECK13-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE11]] ], [ [[TMP46]], [[COND_FALSE12]] ] +// CHECK13-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP47]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK13-NEXT: [[TMP48:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP48]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP49]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[I7:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK13-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK13-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK13-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK13-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK13-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK13-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void @@ -4884,29 +5434,31 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4916,95 +5468,110 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK13-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5014,77 +5581,84 @@ // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP11]], i32 35, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK13-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -5103,8 +5677,7 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -5113,216 +5686,239 @@ // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK13-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK13-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK13-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]]) +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK13-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP29]], i64* [[TMP28]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP31]], i32* [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[TMP32]], align 8 +// CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: store i32* [[TMP6]], i32** [[TMP33]], align 8 +// CHECK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP35]], i32* [[TMP34]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK13-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[I7:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK13-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK13-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK13-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -5491,83 +6087,101 @@ // CHECK13-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5577,63 +6191,69 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK13-NEXT: ret void // // @@ -5641,83 +6261,101 @@ // CHECK13-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5727,63 +6365,69 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK13-NEXT: ret void // // @@ -5792,99 +6436,114 @@ // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.16*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK13-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP19]], i64* [[TMP18]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP20]], align 8 +// CHECK13-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5894,87 +6553,94 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK13-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK13-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]]) // CHECK13-NEXT: ret void // // @@ -5982,83 +6648,101 @@ // CHECK13-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.20*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.20* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.20*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.20* [[__CONTEXT]], %struct.anon.20** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.20*, %struct.anon.20** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], %struct.anon.20* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.21*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.21* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.21*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6068,54 +6752,60 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.21* [[__CONTEXT]], %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], %struct.anon.21* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK13-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK13-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -6131,99 +6821,114 @@ // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.22*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.22* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.22*, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK13-NEXT: store %struct.anon.22* [[__CONTEXT]], %struct.anon.22** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.22*, %struct.anon.22** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], %struct.anon.22* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP19]], i64* [[TMP18]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP20]], align 8 +// CHECK13-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.24*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.24* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.24*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6233,58 +6938,65 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK13-NEXT: store %struct.anon.24* [[__CONTEXT]], %struct.anon.24** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.24*, %struct.anon.24** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], %struct.anon.24* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK13-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK13-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -6676,27 +7388,30 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6706,91 +7421,108 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK15-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6803,80 +7535,88 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void @@ -6888,27 +7628,30 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: -// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6918,91 +7661,108 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK15-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7015,80 +7775,88 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void @@ -7101,33 +7869,35 @@ // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -7137,122 +7907,140 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP24]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP28]], i32* [[TMP27]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[TMP31]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store i32* [[TMP6]], i32** [[TMP32]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK15-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK15-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] +// CHECK15-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK15-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK15: cond.true11: -// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END13:%.*]] // CHECK15: cond.false12: -// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END13]] // CHECK15: cond.end13: -// CHECK15-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] +// CHECK15-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] // CHECK15-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK15-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -7265,81 +8053,91 @@ // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP29]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void @@ -7351,27 +8149,30 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7381,91 +8182,108 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK15-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7478,69 +8296,77 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP11]], i32 35, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK15-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP19]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP28]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK15-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -7560,33 +8386,35 @@ // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -7596,97 +8424,115 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]]) +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[TMP30]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP33]], i32* [[TMP32]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK15-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP37]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -7699,71 +8545,81 @@ // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP20]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP31]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -7931,81 +8787,99 @@ // CHECK15-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8015,60 +8889,66 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK15-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK15-NEXT: ret void // // @@ -8076,81 +8956,99 @@ // CHECK15-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8160,60 +9058,66 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK15-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK15-NEXT: ret void // // @@ -8222,93 +9126,111 @@ // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[TMP16]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8318,81 +9240,89 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP24]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK15-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK15-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK15-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]]) // CHECK15-NEXT: ret void // // @@ -8400,81 +9330,99 @@ // CHECK15-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.16*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8484,51 +9432,57 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK15-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP18]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -8544,93 +9498,111 @@ // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.17*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.17* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.17*, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.17* [[__CONTEXT]], %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], %struct.anon.17* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[TMP16]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8640,53 +9612,61 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK15-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP21]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -9082,29 +10062,31 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9114,95 +10096,110 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK17-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9212,87 +10209,94 @@ // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] +// CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK17-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void @@ -9304,29 +10308,31 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9336,95 +10342,110 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK17-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9434,87 +10455,94 @@ // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] +// CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK17-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void @@ -9527,8 +10555,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -9537,250 +10564,273 @@ // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK17-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK17-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK17-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] -// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]) +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK17-NEXT: store i64 [[TMP24]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK17-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP28]], i64* [[TMP27]], align 8 +// CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP30]], i64* [[TMP29]], align 8 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 8 +// CHECK17-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[TMP33]], align 8 +// CHECK17-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store i32* [[TMP6]], i32** [[TMP34]], align 8 +// CHECK17-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK17-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK17-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK17-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] -// CHECK17-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] -// CHECK17: cond.true14: -// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK17-NEXT: br label [[COND_END16:%.*]] -// CHECK17: cond.false15: -// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: br label [[COND_END16]] -// CHECK17: cond.end16: -// CHECK17-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE14]] ], [ [[TMP33]], [[COND_FALSE15]] ] -// CHECK17-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] +// CHECK17-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK17: cond.true11: +// CHECK17-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: br label [[COND_END13:%.*]] +// CHECK17: cond.false12: +// CHECK17-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: br label [[COND_END13]] +// CHECK17: cond.end13: +// CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE11]] ], [ [[TMP46]], [[COND_FALSE12]] ] +// CHECK17-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP47]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK17-NEXT: [[TMP48:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP48]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP49]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I7:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 8 +// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK17-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK17-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK17-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK17-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK17-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK17-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void @@ -9792,29 +10842,31 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9824,95 +10876,110 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK17-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9922,77 +10989,84 @@ // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP11]], i32 1073741859, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !15 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !15 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !15 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !15 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -10011,8 +11085,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -10021,216 +11094,239 @@ // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK17-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK17-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK17-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]]) +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK17-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP29]], i64* [[TMP28]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP31]], i32* [[TMP30]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[TMP32]], align 8 +// CHECK17-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store i32* [[TMP6]], i32** [[TMP33]], align 8 +// CHECK17-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP35]], i32* [[TMP34]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK17-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I7:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 8 +// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK17-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK17-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK17-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK17-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -10399,83 +11495,101 @@ // CHECK17-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10485,63 +11599,69 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -10549,83 +11669,101 @@ // CHECK17-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10635,63 +11773,69 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -10700,99 +11844,114 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.16*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK17-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP19]], i64* [[TMP18]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP20]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10802,87 +11961,94 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK17-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]]) // CHECK17-NEXT: ret void // // @@ -10890,83 +12056,101 @@ // CHECK17-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.20*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.20* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.20*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.20* [[__CONTEXT]], %struct.anon.20** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.20*, %struct.anon.20** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], %struct.anon.20* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.21*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.21* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.21*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10976,54 +12160,60 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.21* [[__CONTEXT]], %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], %struct.anon.21* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK17-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -11039,99 +12229,114 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.22*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.22* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.22*, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK17-NEXT: store %struct.anon.22* [[__CONTEXT]], %struct.anon.22** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.22*, %struct.anon.22** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], %struct.anon.22* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP19]], i64* [[TMP18]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP20]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.24*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.24* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.24*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11141,58 +12346,65 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK17-NEXT: store %struct.anon.24* [[__CONTEXT]], %struct.anon.24** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.24*, %struct.anon.24** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], %struct.anon.24* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK17-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -11584,27 +12796,30 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11614,91 +12829,108 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK19-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11711,80 +12943,88 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void @@ -11796,27 +13036,30 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11826,91 +13069,108 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK19-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11923,80 +13183,88 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void @@ -12009,33 +13277,35 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -12045,122 +13315,140 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP24]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP28]], i32* [[TMP27]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP31]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32* [[TMP6]], i32** [[TMP32]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK19-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK19-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK19-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] +// CHECK19-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK19-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK19: cond.true11: -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END13:%.*]] // CHECK19: cond.false12: -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END13]] // CHECK19: cond.end13: -// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] +// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] // CHECK19-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK19-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -12173,81 +13461,91 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP29]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void @@ -12259,27 +13557,30 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12289,91 +13590,108 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK19-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12383,72 +13701,80 @@ // CHECK19-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP11]], i32 1073741859, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP28]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -12468,33 +13794,35 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -12504,97 +13832,115 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]]) +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP30]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP33]], i32* [[TMP32]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK19-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP37]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -12607,71 +13953,81 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP20]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP31]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -12839,81 +14195,99 @@ // CHECK19-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12923,60 +14297,66 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -12984,81 +14364,99 @@ // CHECK19-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13068,60 +14466,66 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -13130,93 +14534,111 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13226,81 +14648,89 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP24]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK19-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]]) // CHECK19-NEXT: ret void // // @@ -13308,81 +14738,99 @@ // CHECK19-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.16*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13392,51 +14840,57 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK19-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP18]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -13452,93 +14906,111 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.17*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.17* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.17*, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.17* [[__CONTEXT]], %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], %struct.anon.17* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13548,53 +15020,61 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK19-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP21]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK19: omp.inner.for.end: diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp @@ -248,8 +248,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[I_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) // CHECK1-NEXT: store i64 [[I]], i64* [[I_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 @@ -264,126 +263,139 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[I_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV6]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[I_CASTED]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[CONV7]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [1000 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP5]], i64 [[TMP7]], [1000 x i32]* [[TMP1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [1000 x i32]* [[TMP1]], [1000 x i32]** [[TMP8]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[I:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[I]], i64* [[I_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[I4]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[I3]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP6]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i64 8) ] // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[I_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[I_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [1000 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], [1000 x i32]* [[TMP0]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP31]], i32* [[TMP30]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store [1000 x i32]* [[TMP6]], [1000 x i32]** [[TMP32]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK1-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK1-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP39]], 0 +// CHECK1-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -392,132 +404,140 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[I:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_STEP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I7:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I8:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[I]], i64* [[I_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[I4]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[I3]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP10]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i64 8) ] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTLINEAR_STEP]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTLINEAR_STEP]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP20]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP22]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP25]], [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I7]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP10]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK1-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 -// CHECK1-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] -// CHECK1-NEXT: store i32 [[ADD15]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK1-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 +// CHECK1-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK1-NEXT: br i1 [[TMP39]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: @@ -532,29 +552,31 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 // CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [1000 x i32]*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP2]], [1000 x i32]* [[TMP0]], i32* [[TMP3]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [1000 x i32]* [[TMP0]], [1000 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[G_ADDR]], align 8 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -564,90 +586,106 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8, !llvm.access.group !11 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]), !llvm.access.group !11 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK1-NEXT: store i64 [[TMP22]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: store i64 [[TMP24]], i64* [[TMP23]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: store i64 [[TMP26]], i64* [[TMP25]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: store i32 [[TMP28]], i32* [[TMP27]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP29]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP5]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: store i32* [[TMP31]], i32** [[TMP30]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK1-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[I3]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[I3]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -656,15 +694,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -674,99 +711,106 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8, !llvm.access.group !15 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP9]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 0 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX7]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]]) +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK1-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK1-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK1-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[I5]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK1-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK1-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[I4]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -954,8 +998,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) // CHECK3-NEXT: store i32 [[I]], i32* [[I_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 @@ -966,24 +1009,26 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[I_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[I_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[I_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [1000 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP5]], i32 [[TMP7]], [1000 x i32]* [[TMP1]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[I_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [1000 x i32]* [[TMP1]], [1000 x i32]** [[TMP8]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[I:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -993,91 +1038,108 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[I]], i32* [[I_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[N]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP6]], i32 0, i32 0 // CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i32 8) ] // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[I_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [1000 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], i32 [[TMP19]], [1000 x i32]* [[TMP0]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK3-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store [1000 x i32]* [[TMP6]], [1000 x i32]** [[TMP30]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[I_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] @@ -1086,15 +1148,15 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[I:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1110,103 +1172,113 @@ // CHECK3-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[I]], i32* [[I_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[N]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP10]], i32 0, i32 0 // CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i32 8) ] -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[I_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTLINEAR_STEP]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTLINEAR_STEP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP20]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP22]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP25]], [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP21]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP10]], i32 0, i32 [[TMP31]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK3-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK3-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK3-NEXT: store i32 [[ADD12]], i32* [[I_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD12]], i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK3-NEXT: br i1 [[TMP39]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: @@ -1221,27 +1293,30 @@ // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 // CHECK3-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK3-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[G_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [1000 x i32]*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP2]], [1000 x i32]* [[TMP0]], i32* [[TMP3]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store [1000 x i32]* [[TMP0]], [1000 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[G_ADDR]], align 4 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[TMP4]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1251,82 +1326,100 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]), !llvm.access.group !12 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[TMP24]], i32* [[TMP23]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP27]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP5]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32* [[TMP29]], i32** [[TMP28]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -1339,15 +1432,14 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1360,88 +1452,96 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP9]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 0 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i32 0, i32 [[TMP28]] +// CHECK3-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX6]], align 4, !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP34]], 0 // CHECK3-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK3-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -1770,8 +1870,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[I_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) // CHECK9-NEXT: store i64 [[I]], i64* [[I_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 @@ -1786,126 +1885,139 @@ // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[I_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV6]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[I_CASTED]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[CONV7]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [1000 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP5]], i64 [[TMP7]], [1000 x i32]* [[TMP1]]) +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [1000 x i32]* [[TMP1]], [1000 x i32]** [[TMP8]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[I:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[I]], i64* [[I_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[I]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 0, i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[I3]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: -// CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP6]], i64 0, i64 0 // CHECK9-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i64 8) ] // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK9-NEXT: [[CONV8:%.*]] = bitcast i64* [[I_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[I_CASTED]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[CONV9:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [1000 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], [1000 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], i32* [[TMP30]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store [1000 x i32]* [[TMP6]], [1000 x i32]** [[TMP32]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK9-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK9-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK9-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP39]], 0 +// CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -1914,132 +2026,140 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[I:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTLINEAR_STEP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I7:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I6:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[I]], i64* [[I_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[I]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 0, i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[I3]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: -// CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP10]], i64 0, i64 0 // CHECK9-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i64 8) ] -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTLINEAR_START]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTLINEAR_STEP]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTLINEAR_START]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTLINEAR_STEP]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP20]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP22]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP25]], [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I7]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP10]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK9-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 -// CHECK9-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 -// CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] -// CHECK9-NEXT: store i32 [[ADD15]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK9-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 +// CHECK9-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 +// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK9-NEXT: store i32 [[ADD13]], i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK9-NEXT: br i1 [[TMP29]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK9-NEXT: br i1 [[TMP39]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK9: .omp.linear.pu: // CHECK9-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK9: .omp.linear.pu.done: @@ -2054,29 +2174,31 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 // CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [1000 x i32]*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP2]], [1000 x i32]* [[TMP0]], i32* [[TMP3]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store [1000 x i32]* [[TMP0]], [1000 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32*, i32** [[G_ADDR]], align 8 +// CHECK9-NEXT: store i32* [[TMP5]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2086,90 +2208,106 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !12 -// CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4, !llvm.access.group !12 -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !12 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8, !llvm.access.group !12 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]), !llvm.access.group !12 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK9-NEXT: store i64 [[TMP20]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK9-NEXT: store i64 [[TMP22]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !12 +// CHECK9-NEXT: store i64 [[TMP24]], i64* [[TMP23]], align 8, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !12 +// CHECK9-NEXT: store i64 [[TMP26]], i64* [[TMP25]], align 8, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: store i32 [[TMP28]], i32* [[TMP27]], align 8, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP29]], align 8, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP5]], align 8, !llvm.access.group !12 +// CHECK9-NEXT: store i32* [[TMP31]], i32** [[TMP30]], align 8, !llvm.access.group !12 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !12 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[I3]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[I3]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -2178,15 +2316,14 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2196,99 +2333,106 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !16 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8, !llvm.access.group !16 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 0 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !16 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !16 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !16 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP9]], align 8, !llvm.access.group !16 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 0 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !16 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX7]], align 4, !llvm.access.group !16 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]]) +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK9-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK9-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK9-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK9-NEXT: store i32 [[ADD13]], i32* [[I5]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK9-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[I4]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -2304,8 +2448,7 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) // CHECK11-NEXT: store i32 [[I]], i32* [[I_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 @@ -2316,24 +2459,26 @@ // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[I_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[I_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[I_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [1000 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP5]], i32 [[TMP7]], [1000 x i32]* [[TMP1]]) +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[I_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [1000 x i32]* [[TMP1]], [1000 x i32]** [[TMP8]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[I:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2343,91 +2488,108 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[I]], i32* [[I_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[I]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: -// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP6]], i32 0, i32 0 // CHECK11-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i32 8) ] // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[I_CASTED]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I_CASTED]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [1000 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], i32 [[TMP19]], [1000 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store [1000 x i32]* [[TMP6]], [1000 x i32]** [[TMP30]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK11-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[I_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -2436,15 +2598,15 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[I:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2460,103 +2622,113 @@ // CHECK11-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[I]], i32* [[I_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[I]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: -// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP10]], i32 0, i32 0 // CHECK11-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i32 8) ] -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[I_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTLINEAR_START]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTLINEAR_STEP]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTLINEAR_START]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTLINEAR_STEP]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP20]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP22]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP25]], [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP10]], i32 0, i32 [[TMP31]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK11-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK11-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK11-NEXT: store i32 [[ADD12]], i32* [[I_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[ADD12]], i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK11-NEXT: br i1 [[TMP39]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK11: .omp.linear.pu: // CHECK11-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK11: .omp.linear.pu.done: @@ -2571,27 +2743,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 // CHECK11-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[G_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [1000 x i32]*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP2]], [1000 x i32]* [[TMP0]], i32* [[TMP3]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store [1000 x i32]* [[TMP0]], [1000 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32*, i32** [[G_ADDR]], align 4 +// CHECK11-NEXT: store i32* [[TMP5]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK11-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2601,82 +2776,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]), !llvm.access.group !13 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: store i32 [[TMP24]], i32* [[TMP23]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store [1000 x i32]* [[TMP4]], [1000 x i32]** [[TMP27]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP5]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: store i32* [[TMP29]], i32** [[TMP28]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !13 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -2689,15 +2882,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], [1000 x i32]* noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK11-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2710,88 +2902,96 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load [1000 x i32]*, [1000 x i32]** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !17 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4, !llvm.access.group !17 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !17 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4, !llvm.access.group !17 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP9]], align 4, !llvm.access.group !17 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 0 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !17 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP8]], i32 0, i32 [[TMP28]] +// CHECK11-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX6]], align 4, !llvm.access.group !17 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK11-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP34]], 0 // CHECK11-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK11-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp @@ -148,18 +148,21 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -167,58 +170,73 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 56087, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !4 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: store i32 456, i32* [[J]], align 4 @@ -228,13 +246,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -246,79 +264,85 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK1-NEXT: store i32 [[ADD7]], i32* [[J]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: store i32 456, i32* [[J]], align 4 @@ -383,18 +407,21 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -402,56 +429,71 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 56087, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !5 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: store i32 456, i32* [[J]], align 4 @@ -461,13 +503,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -479,75 +521,81 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], i32* [[J]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i32 0, i32 [[TMP19]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP20]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: store i32 456, i32* [[J]], align 4 @@ -839,8 +887,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -851,151 +898,170 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[M_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[M_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], i64* [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP9]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[M_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[M]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]] -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB11]], i64* [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 // CHECK9-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], i64* [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[CONV17:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV17]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV18]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[M_CASTED]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP19]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !5 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: store i64 [[TMP27]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: store i64 [[TMP30]], i64* [[TMP29]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: store i64 [[TMP32]], i64* [[TMP31]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[M]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP6]], i64* [[TMP37]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i64 [[TMP8]], i64* [[TMP38]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store i32* [[TMP10]], i32** [[TMP39]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP41:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP40]], [[TMP41]] // CHECK9-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK9-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP31]], 0 -// CHECK9-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 -// CHECK9-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 -// CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] -// CHECK9-NEXT: store i32 [[ADD22]], i32* [[I13]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB23:%.*]] = sub nsw i32 [[TMP32]], 0 -// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i32 [[SUB23]], 1 -// CHECK9-NEXT: [[MUL25:%.*]] = mul nsw i32 [[DIV24]], 1 -// CHECK9-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK9-NEXT: store i32 [[ADD26]], i32* [[J14]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP46]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] +// CHECK9-NEXT: store i32 [[ADD16]], i32* [[I9]], align 4 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP47]], 0 +// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 +// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 +// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL19]] +// CHECK9-NEXT: store i32 [[ADD20]], i32* [[J10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -1004,172 +1070,178 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[M_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], i32* [[M]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]] -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB11]], i64* [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 // CHECK9-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[TMP10]], i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP21]], i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP22]], i64* [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP30]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK9-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP21]], [[CONV20]] -// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK9-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK9-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK9-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK9-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK9-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK9-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK9-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP23]], [[MUL33]] -// CHECK9-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK9-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK9-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I13]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[J14]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM38]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX39]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP33]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], i32* [[I9]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP36]], [[CONV23]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP35]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], i32* [[J10]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[I9]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP39]] to i64 +// CHECK9-NEXT: [[TMP40:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP12]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i64 [[TMP40]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[J10]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX35]], align 4, !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK9-NEXT: store i64 [[ADD40]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 -// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK9-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB41:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB37:%.*]] = sub nsw i32 [[TMP47]], 0 +// CHECK9-NEXT: [[DIV38:%.*]] = sdiv i32 [[SUB37]], 1 +// CHECK9-NEXT: [[MUL39:%.*]] = mul nsw i32 [[DIV38]], 1 +// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL39]] +// CHECK9-NEXT: store i32 [[ADD40]], i32* [[I9]], align 4 +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB41:%.*]] = sub nsw i32 [[TMP48]], 0 // CHECK9-NEXT: [[DIV42:%.*]] = sdiv i32 [[SUB41]], 1 // CHECK9-NEXT: [[MUL43:%.*]] = mul nsw i32 [[DIV42]], 1 // CHECK9-NEXT: [[ADD44:%.*]] = add nsw i32 0, [[MUL43]] -// CHECK9-NEXT: store i32 [[ADD44]], i32* [[I13]], align 4 -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB45:%.*]] = sub nsw i32 [[TMP36]], 0 -// CHECK9-NEXT: [[DIV46:%.*]] = sdiv i32 [[SUB45]], 1 -// CHECK9-NEXT: [[MUL47:%.*]] = mul nsw i32 [[DIV46]], 1 -// CHECK9-NEXT: [[ADD48:%.*]] = add nsw i32 0, [[MUL47]] -// CHECK9-NEXT: store i32 [[ADD48]], i32* [[J14]], align 4 +// CHECK9-NEXT: store i32 [[ADD44]], i32* [[J10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -1213,18 +1285,21 @@ // CHECK9-SAME: ([10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x [2 x i32]]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [10 x [2 x i32]]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x [2 x i32]]* [[TMP0]], [10 x [2 x i32]]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1232,58 +1307,73 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 19, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x [2 x i32]]* [[TMP0]]), !llvm.access.group !14 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x [2 x i32]]* [[TMP2]], [10 x [2 x i32]]** [[TMP18]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !14 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: store i32 2, i32* [[J]], align 4 @@ -1293,13 +1383,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1311,78 +1401,84 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK9-NEXT: store i32 [[ADD7]], i32* [[J]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: store i32 2, i32* [[J]], align 4 @@ -1531,8 +1627,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -1541,147 +1636,172 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[M_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[M_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP9]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[M]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 // CHECK11-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], i64* [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: store i32 [[TMP23]], i32* [[N_CASTED]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[M_ADDR]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: store i32 [[TMP25]], i32* [[M_CASTED]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[M_CASTED]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP20]], i32 [[TMP22]], i32 [[TMP24]], i32 [[TMP26]], i32 [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !6 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +// CHECK11-NEXT: store i32 [[TMP28]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP30:%.*]] = trunc i64 [[TMP29]] to i32 +// CHECK11-NEXT: store i32 [[TMP30]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[M]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store i32 [[TMP38]], i32* [[TMP37]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[TMP39]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[TMP40]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store i32* [[TMP10]], i32** [[TMP41]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP43:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP42]], [[TMP43]] // CHECK11-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP45]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK11-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP33]], 0 -// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1 -// CHECK11-NEXT: [[ADD18:%.*]] = add nsw i32 0, [[MUL17]] -// CHECK11-NEXT: store i32 [[ADD18]], i32* [[I11]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP34]], 0 -// CHECK11-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 -// CHECK11-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 -// CHECK11-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] -// CHECK11-NEXT: store i32 [[ADD22]], i32* [[J12]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP48]], 0 +// CHECK11-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK11-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 +// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] +// CHECK11-NEXT: store i32 [[ADD16]], i32* [[I9]], align 4 +// CHECK11-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP49]], 0 +// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 +// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 +// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL19]] +// CHECK11-NEXT: store i32 [[ADD20]], i32* [[J10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -1690,170 +1810,178 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[M]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 // CHECK11-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK11-NEXT: store i64 [[CONV11]], i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[CONV12]], i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP21]], i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[CONV9:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[CONV10:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK11-NEXT: store i64 [[CONV9]], i64* [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[CONV10]], i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP30]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK11-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP21]], [[CONV20]] -// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK11-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK11-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK11-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK11-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK11-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK11-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK11-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP23]], [[MUL33]] -// CHECK11-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK11-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK11-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK11-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I13]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP1]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[J14]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i32 [[TMP29]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX38]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP33]], [[CONV18]] +// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV21]], i32* [[I11]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP36]], [[CONV25]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP35]], [[MUL31]] +// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK11-NEXT: store i32 [[CONV35]], i32* [[J12]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[I11]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP40:%.*]] = mul nsw i32 [[TMP39]], [[TMP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 [[TMP40]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[J12]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i32 [[TMP41]] +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX36]], align 4, !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK11-NEXT: store i64 [[ADD39]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK11-NEXT: store i64 [[ADD37]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 -// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK11-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB40:%.*]] = sub nsw i32 [[TMP35]], 0 -// CHECK11-NEXT: [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1 -// CHECK11-NEXT: [[MUL42:%.*]] = mul nsw i32 [[DIV41]], 1 -// CHECK11-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL42]] -// CHECK11-NEXT: store i32 [[ADD43]], i32* [[I13]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB44:%.*]] = sub nsw i32 [[TMP36]], 0 -// CHECK11-NEXT: [[DIV45:%.*]] = sdiv i32 [[SUB44]], 1 -// CHECK11-NEXT: [[MUL46:%.*]] = mul nsw i32 [[DIV45]], 1 -// CHECK11-NEXT: [[ADD47:%.*]] = add nsw i32 0, [[MUL46]] -// CHECK11-NEXT: store i32 [[ADD47]], i32* [[J14]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB38:%.*]] = sub nsw i32 [[TMP47]], 0 +// CHECK11-NEXT: [[DIV39:%.*]] = sdiv i32 [[SUB38]], 1 +// CHECK11-NEXT: [[MUL40:%.*]] = mul nsw i32 [[DIV39]], 1 +// CHECK11-NEXT: [[ADD41:%.*]] = add nsw i32 0, [[MUL40]] +// CHECK11-NEXT: store i32 [[ADD41]], i32* [[I11]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB42:%.*]] = sub nsw i32 [[TMP48]], 0 +// CHECK11-NEXT: [[DIV43:%.*]] = sdiv i32 [[SUB42]], 1 +// CHECK11-NEXT: [[MUL44:%.*]] = mul nsw i32 [[DIV43]], 1 +// CHECK11-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] +// CHECK11-NEXT: store i32 [[ADD45]], i32* [[J12]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -1897,18 +2025,21 @@ // CHECK11-SAME: ([10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x [2 x i32]]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [10 x [2 x i32]]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x [2 x i32]]* [[TMP0]], [10 x [2 x i32]]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1916,56 +2047,71 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 19, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x [2 x i32]]* [[TMP0]]), !llvm.access.group !15 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [10 x [2 x i32]]* [[TMP2]], [10 x [2 x i32]]** [[TMP16]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: store i32 2, i32* [[J]], align 4 @@ -1975,13 +2121,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1993,74 +2139,80 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], i32* [[J]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP6]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP20]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: store i32 2, i32* [[J]], align 4 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp @@ -227,75 +227,93 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !6 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !6 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -304,13 +322,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -320,67 +338,73 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -392,75 +416,93 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !15 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -469,13 +511,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -485,67 +527,73 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -557,95 +605,113 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], %struct.SS* [[TMP0]]), !llvm.access.group !21 +// CHECK1-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: store i64 [[TMP14]], i64* [[TMP13]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: store i64 [[TMP16]], i64* [[TMP15]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP17]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], 122 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 122 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: -// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP18]], [[COND_FALSE6]] ] +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -654,13 +720,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -670,67 +736,73 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -838,73 +910,91 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !7 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !7 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -913,13 +1003,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -929,64 +1019,70 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -998,73 +1094,91 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !16 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1073,13 +1187,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1089,64 +1203,70 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1158,93 +1278,111 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]], %struct.SS* [[TMP0]]), !llvm.access.group !22 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[TMP13]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP15]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !22 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], 122 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 122 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP16]], [[COND_FALSE6]] ] +// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1253,13 +1391,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1269,64 +1407,70 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1826,29 +1970,31 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1858,90 +2004,106 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !9 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[I3]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[I3]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -1950,15 +2112,14 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1968,97 +2129,104 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK9-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 -// CHECK9-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[I5]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK9-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK9-NEXT: store i32 [[ADD11]], i32* [[I4]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -2072,29 +2240,31 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2104,90 +2274,106 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !18 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !18 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !18 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !18 +// CHECK9-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8, !llvm.access.group !18 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !18 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[I3]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[I3]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -2196,15 +2382,14 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2214,97 +2399,104 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK9-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 -// CHECK9-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[I5]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK9-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK9-NEXT: store i32 [[ADD11]], i32* [[I4]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -2319,8 +2511,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -2329,152 +2520,168 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK9-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !24 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !24 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK9-NEXT: store i64 [[TMP24]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: store i64 [[TMP28]], i64* [[TMP27]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: store i64 [[TMP30]], i64* [[TMP29]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[TMP33]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i32* [[TMP6]], i32** [[TMP34]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] -// CHECK9: cond.true14: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: br label [[COND_END16:%.*]] -// CHECK9: cond.false15: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: br label [[COND_END16]] -// CHECK9: cond.end16: -// CHECK9-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE14]] ], [ [[TMP33]], [[COND_FALSE15]] ] -// CHECK9-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK9: cond.true11: +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: br label [[COND_END13:%.*]] +// CHECK9: cond.false12: +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: br label [[COND_END13]] +// CHECK9: cond.end13: +// CHECK9-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE11]] ], [ [[TMP46]], [[COND_FALSE12]] ] +// CHECK9-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: store i32 [[TMP47]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK9-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP48:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP48]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP49]]) +// CHECK9-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 +// CHECK9-NEXT: br i1 [[TMP51]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[SUB18:%.*]] = sub nsw i32 [[TMP39]], 0 -// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i32 [[SUB18]], 1 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV19]], 1 -// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD20]], i32* [[I5]], align 4 +// CHECK9-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP52]], 0 +// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] +// CHECK9-NEXT: store i32 [[ADD17]], i32* [[I4]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -2483,121 +2690,128 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I7:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 8 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK9-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK9-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 -// CHECK9-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 -// CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] -// CHECK9-NEXT: store i32 [[ADD14]], i32* [[I7]], align 4 -// CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK9: .omp.final.done: -// CHECK9-NEXT: br label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK9-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK9-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[I5]], align 4 +// CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK9: .omp.final.done: +// CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // @@ -2699,75 +2913,93 @@ // CHECK9-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !30 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2776,13 +3008,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2792,66 +3024,72 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2863,75 +3101,93 @@ // CHECK9-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !36 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2940,13 +3196,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2956,66 +3212,72 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3028,111 +3290,126 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK9-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !42 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !42 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: store i64 [[TMP19]], i64* [[TMP18]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP20]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP20]], 9 -// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] -// CHECK9: cond.true6: -// CHECK9-NEXT: br label [[COND_END8:%.*]] -// CHECK9: cond.false7: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: br label [[COND_END8]] -// CHECK9: cond.end8: -// CHECK9-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP21]], [[COND_FALSE7]] ] -// CHECK9-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP29]], 9 +// CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK9: cond.true5: +// CHECK9-NEXT: br label [[COND_END7:%.*]] +// CHECK9: cond.false6: +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: br label [[COND_END7]] +// CHECK9: cond.end7: +// CHECK9-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP30]], [[COND_FALSE6]] ] +// CHECK9-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK9-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3141,14 +3418,14 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3158,68 +3435,75 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK9-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !45 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3470,27 +3754,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3500,82 +3787,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !10 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -3588,15 +3893,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3609,86 +3913,94 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -3706,27 +4018,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3736,82 +4051,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !19 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -3824,15 +4157,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3845,86 +4177,94 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -3943,33 +4283,35 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3979,112 +4321,131 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !25 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[TMP24]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[TMP28]], i32* [[TMP27]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP31]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[TMP6]], i32** [[TMP32]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !25 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK11-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK11: cond.true11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK11-NEXT: br label [[COND_END13:%.*]] // CHECK11: cond.false12: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK11-NEXT: br label [[COND_END13]] // CHECK11: cond.end13: -// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] +// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] // CHECK11-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK11-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK11-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP50]], 0 // CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -4097,16 +4458,15 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4119,87 +4479,97 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP29]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK11-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK11-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -4307,73 +4677,91 @@ // CHECK11-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !31 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !31 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4382,13 +4770,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4398,63 +4786,69 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4466,73 +4860,91 @@ // CHECK11-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !37 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !37 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !37 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !37 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4541,13 +4953,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4557,63 +4969,69 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !40 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !40 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !40 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4626,105 +5044,123 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !43 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[TMP16]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !43 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP27]], 9 // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK11: cond.true5: // CHECK11-NEXT: br label [[COND_END7:%.*]] // CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK11-NEXT: br label [[COND_END7]] // CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ] +// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP28]], [[COND_FALSE6]] ] // CHECK11-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: store i32 [[TMP29]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4733,14 +5169,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4750,64 +5186,72 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !46 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !46 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP19]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !46 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -392,8 +392,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -404,149 +403,168 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !5 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]), !llvm.access.group !5 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: store i64 [[TMP29]], i64* [[TMP28]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP30]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP33]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP34]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK1-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP28]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP43]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done11: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // @@ -584,151 +602,157 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP8:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast [2 x %struct.S]* [[TMP10]] to %struct.S* +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP20]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done6: -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP20]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP12]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP22]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP23]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !9 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP29]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false), !llvm.access.group !9 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP34]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[SIVAR]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK1-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN14]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP29]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP41]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done15: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -736,33 +760,33 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) -// CHECK1-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK1-NEXT: store %struct.S.2* [[TEST]], %struct.S.2** [[VAR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S.2*, %struct.S.2** [[VAR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP1]], %struct.S.2** [[TMP]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8** [[TMP5]] to [2 x i32]** // CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP6]], align 8 @@ -780,19 +804,19 @@ // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK1-NEXT: store i8* null, i8** [[TMP14]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to [2 x %struct.S.0]** -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to [2 x %struct.S.2]** +// CHECK1-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.2]** +// CHECK1-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP18]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 // CHECK1-NEXT: store i8* null, i8** [[TMP19]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to %struct.S.0** -// CHECK1-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to %struct.S.2** +// CHECK1-NEXT: store %struct.S.2* [[TMP4]], %struct.S.2** [[TMP21]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK1-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.2** +// CHECK1-NEXT: store %struct.S.2* [[TMP4]], %struct.S.2** [[TMP23]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 // CHECK1-NEXT: store i8* null, i8** [[TMP24]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -802,21 +826,21 @@ // CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 // CHECK1-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81([2 x i32]* [[VEC]], i64 [[TMP3]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81([2 x i32]* [[VEC]], i64 [[TMP3]], [2 x %struct.S.2]* [[S_ARR]], %struct.S.2* [[TMP4]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done2: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK1-NEXT: ret i32 [[TMP30]] // @@ -864,379 +888,405 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81 -// CHECK1-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.2]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.2]*, align 8 +// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[S_ARR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[S_ARR_ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.2*, %struct.S.2** [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP2]], %struct.S.2** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S.2]* [[TMP1]], [2 x %struct.S.2]** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP8]], %struct.S.2** [[TMP7]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP7]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP8]], %struct.S.2** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S.2]* [[TMP6]] to %struct.S.2* +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.2* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP11]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.2* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP13:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TMP13]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]), !llvm.access.group !14 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK1-NEXT: store i64 [[TMP22]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK1-NEXT: store i64 [[TMP24]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: store i64 [[TMP26]], i64* [[TMP25]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: store i64 [[TMP28]], i64* [[TMP27]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP29]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: store i32 [[TMP31]], i32* [[TMP30]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP32]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP34:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP4]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: store %struct.S.2* [[TMP34]], %struct.S.2** [[TMP33]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK1-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP28]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done11: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP41]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done7: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_2St -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[S]], %struct.S.2** [[S_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S.2*, %struct.S.2** [[S_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP8:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP9:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP4:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP11]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP12]], %struct.S.2** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S.2]* [[TMP10]] to %struct.S.2* +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.2* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP17]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done6: -// CHECK1-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP9]], %struct.St* noundef [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) #[[ATTR2]] -// CHECK1-NEXT: store %struct.S.0* [[VAR7]], %struct.S.0** [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.2* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done3: +// CHECK1-NEXT: [[TMP19:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TMP19]], %struct.St* noundef [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP21]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM11]] -// CHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX12]] to i8* -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false), !llvm.access.group !17 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP30:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP5]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S.2* [[ARRAYIDX8]] to i8* +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S.2* [[TMP30]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false), !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK1-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN14]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN10]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done15: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP39]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done11: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1245,16 +1295,16 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_2St -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 -// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[S]], %struct.S.2** [[S_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S.2*, %struct.S.2** [[S_ADDR]], align 8 +// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[T]], i32 0, i32 0 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 @@ -1264,11 +1314,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -1480,8 +1530,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1490,141 +1539,166 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC1]] to i8* -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]), !llvm.access.group !6 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP28]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP31]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP32]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK3-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP26]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP41]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done8: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done5: // CHECK3-NEXT: ret void // // @@ -1662,145 +1736,153 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC1]] to i8* -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast [2 x %struct.S]* [[TMP10]] to %struct.S* +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP20]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP20]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP12]], %struct.St* noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP22]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP23]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP18]] -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* -// CHECK3-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !10 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP30]] +// CHECK3-NEXT: store i32 [[TMP29]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP31]] +// CHECK3-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK3-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false), !llvm.access.group !10 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP35]], [[TMP34]] +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4, !llvm.access.group !10 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP36]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK3-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP29]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP41]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1808,32 +1890,32 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK3-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK3-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP1]], %struct.S.1** [[TMP]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[T_VAR_CASTED]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8** [[TMP5]] to [2 x i32]** // CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP6]], align 4 @@ -1851,19 +1933,19 @@ // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK3-NEXT: store i8* null, i8** [[TMP14]], align 4 // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to [2 x %struct.S.0]** -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to [2 x %struct.S.1]** +// CHECK3-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP16]], align 4 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.1]** +// CHECK3-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP18]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 // CHECK3-NEXT: store i8* null, i8** [[TMP19]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to %struct.S.0** -// CHECK3-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to %struct.S.1** +// CHECK3-NEXT: store %struct.S.1* [[TMP4]], %struct.S.1** [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK3-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.1** +// CHECK3-NEXT: store %struct.S.1* [[TMP4]], %struct.S.1** [[TMP23]], align 4 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 // CHECK3-NEXT: store i8* null, i8** [[TMP24]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -1873,21 +1955,21 @@ // CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 // CHECK3-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81([2 x i32]* [[VEC]], i32 [[TMP3]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81([2 x i32]* [[VEC]], i32 [[TMP3]], [2 x %struct.S.1]* [[S_ARR]], %struct.S.1* [[TMP4]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK3: arraydestroy.done2: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK3-NEXT: ret i32 [[TMP30]] // @@ -1935,368 +2017,398 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK3-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81 -// CHECK3-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.1]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 4 +// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP2]], %struct.S.1** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x %struct.S.1]* [[TMP1]], [2 x %struct.S.1]** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP7]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S.1]* [[TMP6]] to %struct.S.1* +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP11]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP13:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP13]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]), !llvm.access.group !15 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP24]], i32* [[TMP23]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP27]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP30]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP32:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP4]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store %struct.S.1* [[TMP32]], %struct.S.1** [[TMP31]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK3-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP26]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done10: +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP39]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done7: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_2St -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[S]], %struct.S.1** [[S_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S.1*, %struct.S.1** [[S_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP11]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S.1]* [[TMP10]] to %struct.S.1* +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP17]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP9]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP19:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP19]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP21]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* -// CHECK3-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false), !llvm.access.group !18 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP29]] +// CHECK3-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP30:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP4]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP31]] +// CHECK3-NEXT: [[TMP32:%.*]] = bitcast %struct.S.1* [[ARRAYIDX6]] to i8* +// CHECK3-NEXT: [[TMP33:%.*]] = bitcast %struct.S.1* [[TMP30]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false), !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK3-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP39]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -2305,16 +2417,16 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_2St -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 4 -// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[S]], %struct.S.1** [[S_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S.1*, %struct.S.1** [[S_ADDR]], align 4 +// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[TMP0]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[T]], i32 0, i32 0 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 @@ -2324,11 +2436,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -2478,9 +2590,7 @@ // CHECK5-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK5-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK5-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK5-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 @@ -2488,110 +2598,116 @@ // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK5-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load volatile i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* @g, align 4 +// CHECK5-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[G_ADDR]] to i32* -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* -// CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK5-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[G]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], i32* [[G1]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], i32* [[SIVAR]], align 4 +// CHECK5-NEXT: store i32* [[G1]], i32** [[TMP]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !4 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]), !llvm.access.group !4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK5-NEXT: store i64 [[TMP17]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: store i64 [[TMP19]], i64* [[TMP18]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: store i64 [[TMP21]], i64* [[TMP20]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[G]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* @g, align 4, !llvm.access.group !4 +// CHECK5-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK5-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2600,99 +2716,109 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[G_ADDR]] to i32* -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* -// CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK5-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK5-NEXT: store i32 [[TMP6]], i32* [[G]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], i32* [[G1]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK5-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 +// CHECK5-NEXT: store i32* [[G1]], i32** [[TMP]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 -// CHECK5-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 -// CHECK5-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK5-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8, !llvm.access.group !8 -// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !8 +// CHECK5-NEXT: store i32 1, i32* [[G]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: store volatile i32 1, i32* [[TMP21]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: store i32 2, i32* [[SIVAR]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store i32* [[G]], i32** [[TMP22]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: store i32* [[TMP24]], i32** [[TMP23]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK5-NEXT: store i32* [[SIVAR]], i32** [[TMP25]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !8 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK5-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3506,8 +3632,7 @@ // CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 // CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK13-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK13-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK13-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK13-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -3518,149 +3643,168 @@ // CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i64 [[TMP6]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK13-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* +// CHECK13-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK13-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK13-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK13-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4:[0-9]+]] // CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5:[0-9]+]] // CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done4: -// CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done1: +// CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* noundef [[AGG_TMP2]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR5]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !6 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]), !llvm.access.group !6 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK13-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: store i64 [[TMP29]], i64* [[TMP28]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP30]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP33]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK13-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP34]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !6 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK13-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP40]]) +// CHECK13-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK13-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: -// CHECK13-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i64 2 +// CHECK13-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP28]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP43]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done11: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done5: // CHECK13-NEXT: ret void // // @@ -3698,151 +3842,157 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK13-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP8:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* +// CHECK13-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 7 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK13-NEXT: store i32 [[TMP14]], i32* [[SIVAR]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK13-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK13-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: [[TMP17:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK13-NEXT: [[TMP18:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP19:%.*]] = bitcast [2 x %struct.S]* [[TMP10]] to %struct.S* +// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP20]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] // CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done6: -// CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP8]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) #[[ATTR5]] -// CHECK13-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP20]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done2: +// CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP12]], %struct.St* noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP22]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP23]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK13-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK13-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* -// CHECK13-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !10 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK13-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[TMP29]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK13-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* +// CHECK13-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false), !llvm.access.group !10 +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP34]] +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[SIVAR]], align 4, !llvm.access.group !10 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK13-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], 1 +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK13-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK13-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: -// CHECK13-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN14]], i64 2 +// CHECK13-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP29]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP41]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done15: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done10: // CHECK13-NEXT: ret void // // @@ -3857,332 +4007,358 @@ // // // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81 -// CHECK13-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.2]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 // CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.2]*, align 8 +// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK13-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK13-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[S_ARR_ADDR]], align 8 +// CHECK13-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[VAR_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[S_ARR_ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S.2*, %struct.S.2** [[VAR_ADDR]], align 8 +// CHECK13-NEXT: store %struct.S.2* [[TMP2]], %struct.S.2** [[TMP]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [2 x %struct.S.2]* [[TMP1]], [2 x %struct.S.2]** [[TMP6]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK13-NEXT: store %struct.S.2* [[TMP8]], %struct.S.2** [[TMP7]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK13-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 8 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[_TMP4:%.*]] = alloca %struct.S.2*, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK13-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP7]], align 8 +// CHECK13-NEXT: store %struct.S.2* [[TMP8]], %struct.S.2** [[TMP]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK13-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK13-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S.2]* [[TMP6]] to %struct.S.2* +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.2* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP11]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done4: -// CHECK13-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.St* noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK13-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.2* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done2: +// CHECK13-NEXT: [[TMP13:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TMP13]], %struct.St* noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK13-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP4]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !15 -// CHECK13-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !15 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]), !llvm.access.group !15 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK13-NEXT: store i64 [[TMP22]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK13-NEXT: store i64 [[TMP24]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: store i64 [[TMP26]], i64* [[TMP25]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: store i64 [[TMP28]], i64* [[TMP27]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP29]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: store i32 [[TMP31]], i32* [[TMP30]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP32]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP34:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP4]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: store %struct.S.2* [[TMP34]], %struct.S.2** [[TMP33]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK13-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK13-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i64 2 +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN6]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP28]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done11: +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP41]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done7: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_2St -// CHECK13-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { +// CHECK13-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { // CHECK13-NEXT: entry: -// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 -// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 -// CHECK13-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) #[[ATTR4]] +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: store %struct.S.2* [[S]], %struct.S.2** [[S_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.S.2*, %struct.S.2** [[S_ADDR]], align 8 +// CHECK13-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) #[[ATTR4]] // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP8:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK13-NEXT: [[_TMP9:%.*]] = alloca %struct.S.0*, align 8 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP4:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[_TMP5:%.*]] = alloca %struct.S.2*, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK13-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP12:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP11]], align 8 +// CHECK13-NEXT: store %struct.S.2* [[TMP12]], %struct.S.2** [[TMP]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK13-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK13-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: [[TMP15:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK13-NEXT: [[TMP16:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S.2]* [[TMP10]] to %struct.S.2* +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.2* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP17]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done6: -// CHECK13-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP9]], %struct.St* noundef [[AGG_TMP8]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) #[[ATTR5]] -// CHECK13-NEXT: store %struct.S.0* [[VAR7]], %struct.S.0** [[_TMP9]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.2* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done3: +// CHECK13-NEXT: [[TMP19:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK13-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TMP19]], %struct.St* noundef [[AGG_TMP4]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR5]] +// CHECK13-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP5]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP21]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK13-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8, !llvm.access.group !18 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK13-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM11]] -// CHECK13-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX12]] to i8* -// CHECK13-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false), !llvm.access.group !18 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP30:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP5]], align 8, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK13-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK13-NEXT: [[TMP32:%.*]] = bitcast %struct.S.2* [[ARRAYIDX8]] to i8* +// CHECK13-NEXT: [[TMP33:%.*]] = bitcast %struct.S.2* [[TMP30]] to i8* +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false), !llvm.access.group !18 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK13-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK13-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK13-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN14]], i64 2 +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN10]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done15: +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP39]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done11: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK13-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { +// CHECK13-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { // CHECK13-NEXT: entry: -// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: call void @_ZN1SIiED2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] // CHECK13-NEXT: ret void // // @@ -4238,25 +4414,25 @@ // // // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK13-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { +// CHECK13-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { // CHECK13-NEXT: entry: -// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_2St -// CHECK13-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { +// CHECK13-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { // CHECK13-NEXT: entry: -// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 -// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 -// CHECK13-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: store %struct.S.2* [[S]], %struct.S.2** [[S_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.S.2*, %struct.S.2** [[S_ADDR]], align 8 +// CHECK13-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[TMP0]], i32 0, i32 0 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 // CHECK13-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[T]], i32 0, i32 0 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 @@ -4273,8 +4449,7 @@ // CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 // CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK15-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK15-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK15-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK15-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -4283,141 +4458,166 @@ // CHECK15-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK15-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i32 [[TMP6]]) +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK15-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC1]] to i8* -// CHECK15-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK15-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4:[0-9]+]] // CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5:[0-9]+]] // CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done3: -// CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR5]] -// CHECK15-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done1: +// CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* noundef [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR5]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]), !llvm.access.group !7 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP28]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP31]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP32]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !7 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK15-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK15-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK15-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, i32* [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: -// CHECK15-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 +// CHECK15-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP26]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP41]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done8: +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done5: // CHECK15-NEXT: ret void // // @@ -4455,145 +4655,153 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK15-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], i32* [[SIVAR]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC1]] to i8* -// CHECK15-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK15-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: [[TMP17:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK15-NEXT: [[TMP18:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP19:%.*]] = bitcast [2 x %struct.S]* [[TMP10]] to %struct.S* +// CHECK15-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP20]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] // CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done3: -// CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR5]] -// CHECK15-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP20]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done1: +// CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP12]], %struct.St* noundef [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR5]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP22]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP23]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP18]] -// CHECK15-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP19]] -// CHECK15-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* -// CHECK15-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !11 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK15-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP30]] +// CHECK15-NEXT: store i32 [[TMP29]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP31]] +// CHECK15-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK15-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false), !llvm.access.group !11 +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP35]], [[TMP34]] +// CHECK15-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4, !llvm.access.group !11 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP36]], 1 +// CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK15-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK15-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK15-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, i32* [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: -// CHECK15-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i32 2 +// CHECK15-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP29]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP41]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done11: +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done8: // CHECK15-NEXT: ret void // // @@ -4608,321 +4816,351 @@ // // // CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81 -// CHECK15-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.1]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 // CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 4 +// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK15-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK15-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK15-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 +// CHECK15-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK15-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK15-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 4 +// CHECK15-NEXT: store %struct.S.1* [[TMP2]], %struct.S.1** [[TMP]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [2 x %struct.S.1]* [[TMP1]], [2 x %struct.S.1]** [[TMP6]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK15-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP7]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK15-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[_TMP4:%.*]] = alloca %struct.S.1*, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK15-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 4 +// CHECK15-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK15-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK15-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S.1]* [[TMP6]] to %struct.S.1* +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP11]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done4: -// CHECK15-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.St* noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK15-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done2: +// CHECK15-NEXT: [[TMP13:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP13]], %struct.St* noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK15-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP4]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]), !llvm.access.group !16 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: store i32 [[TMP24]], i32* [[TMP23]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP27]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP30]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP32:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP4]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: store %struct.S.1* [[TMP32]], %struct.S.1** [[TMP31]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !16 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK15-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK15-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK15-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, i32* [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i32 2 +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP26]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done10: +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP39]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done7: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_2St -// CHECK15-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { +// CHECK15-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { // CHECK15-NEXT: entry: -// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 4 -// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 4 -// CHECK15-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) #[[ATTR4]] +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK15-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK15-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: store %struct.S.1* [[S]], %struct.S.1** [[S_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.S.1*, %struct.S.1** [[S_ADDR]], align 4 +// CHECK15-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) #[[ATTR4]] // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK15-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[_TMP4:%.*]] = alloca %struct.S.1*, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK15-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP11]], align 4 +// CHECK15-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK15-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK15-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: [[TMP15:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK15-NEXT: [[TMP16:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S.1]* [[TMP10]] to %struct.S.1* +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP17]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done4: -// CHECK15-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP9]], %struct.St* noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK15-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done2: +// CHECK15-NEXT: [[TMP19:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK15-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP19]], %struct.St* noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK15-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP4]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP21]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP19]] -// CHECK15-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP21]] -// CHECK15-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* -// CHECK15-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false), !llvm.access.group !19 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP29]] +// CHECK15-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP30:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP4]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP31]] +// CHECK15-NEXT: [[TMP32:%.*]] = bitcast %struct.S.1* [[ARRAYIDX6]] to i8* +// CHECK15-NEXT: [[TMP33:%.*]] = bitcast %struct.S.1* [[TMP30]] to i8* +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false), !llvm.access.group !19 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK15-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK15-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK15-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK15-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, i32* [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i32 2 +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN8]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done12: +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP39]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done9: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK15-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { +// CHECK15-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { // CHECK15-NEXT: entry: -// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK15-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] // CHECK15-NEXT: ret void // // @@ -4978,25 +5216,25 @@ // // // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK15-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { +// CHECK15-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { // CHECK15-NEXT: entry: -// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK15-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_2St -// CHECK15-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { +// CHECK15-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 { // CHECK15-NEXT: entry: -// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 4 -// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 4 -// CHECK15-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK15-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK15-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: store %struct.S.1* [[S]], %struct.S.1** [[S_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.S.1*, %struct.S.1** [[S_ADDR]], align 4 +// CHECK15-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[TMP0]], i32 0, i32 0 // CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 // CHECK15-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[T]], i32 0, i32 0 // CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 @@ -5012,9 +5250,7 @@ // CHECK17-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK17-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK17-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK17-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 @@ -5022,110 +5258,116 @@ // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK17-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load volatile i32, i32* [[TMP2]], align 4 -// CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* @g, align 4 +// CHECK17-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK17-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[G_ADDR]] to i32* -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK17-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 +// CHECK17-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[G]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[G1]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], i32* [[SIVAR]], align 4 +// CHECK17-NEXT: store i32* [[G1]], i32** [[TMP]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 -// CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4, !llvm.access.group !5 -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !5 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !5 -// CHECK17-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4, !llvm.access.group !5 -// CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4, !llvm.access.group !5 -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !5 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4, !llvm.access.group !5 -// CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4, !llvm.access.group !5 -// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !5 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]), !llvm.access.group !5 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK17-NEXT: store i64 [[TMP17]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: store i64 [[TMP19]], i64* [[TMP18]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: store i64 [[TMP21]], i64* [[TMP20]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[G]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* @g, align 4, !llvm.access.group !5 +// CHECK17-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK17-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK17-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 2, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5134,18 +5376,19 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 @@ -5154,79 +5397,88 @@ // CHECK17-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[G_ADDR]] to i32* -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK17-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 +// CHECK17-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], i32* [[G]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[G1]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK17-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 +// CHECK17-NEXT: store i32* [[G1]], i32** [[TMP]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !9 -// CHECK17-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0 -// CHECK17-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !9 -// CHECK17-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 2 -// CHECK17-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: store i32 1, i32* [[G]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: store volatile i32 1, i32* [[TMP21]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: store i32 2, i32* [[SIVAR]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[G]], i32** [[TMP22]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: store i32* [[TMP24]], i32** [[TMP23]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[SIVAR]], i32** [[TMP25]], align 8, !llvm.access.group !9 // CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]], !llvm.access.group !9 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK17-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK17-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 2, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp @@ -154,85 +154,97 @@ // CHECK1-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK1-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[ARG]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[ARG]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -241,13 +253,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -257,63 +270,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[ARG]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: store i32 0, i32* [[CONV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: store i32 0, i32* [[ARG]], align 4, !llvm.access.group !13 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -324,75 +344,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l53 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !18 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !18 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !18 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !18 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !18 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !18 -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 +// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP16]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !18 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -401,12 +436,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -416,43 +452,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 // CHECK1-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !21 @@ -460,17 +502,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -547,70 +589,85 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l78 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !24 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !24 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !24 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -619,12 +676,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -634,43 +692,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK1-NEXT: call void @_Z3fn4v(), !llvm.access.group !27 @@ -678,17 +742,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -699,75 +763,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !30 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !30 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !30 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !30 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !30 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !30 -// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !30 -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 +// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP16]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !30 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !30 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -776,12 +855,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -791,43 +871,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK1-NEXT: call void @_Z3fn5v(), !llvm.access.group !33 @@ -835,17 +921,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -857,96 +943,112 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[CONV1]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[TMP0]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !36 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !36 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !36 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !36 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !36 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !36 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !36 -// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP19]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !36 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !36 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -955,12 +1057,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -970,43 +1073,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK1-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 @@ -1014,17 +1123,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1090,70 +1199,85 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..12 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !42 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !42 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !42 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !42 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !42 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1162,12 +1286,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1177,43 +1302,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 // CHECK1-NEXT: call void @_Z3fn1v(), !llvm.access.group !45 @@ -1221,17 +1352,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1242,75 +1373,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l66 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..14 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !48 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !48 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !48 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !48 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !48 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !48 -// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !48 -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 +// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP16]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !48 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !48 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1319,12 +1465,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1334,43 +1481,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 // CHECK1-NEXT: call void @_Z3fn2v(), !llvm.access.group !51 @@ -1378,17 +1531,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1400,96 +1553,112 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[CONV1]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[TMP0]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK1-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !54 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !54 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !54 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !54 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !54 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !54 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !54 +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !54 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !54 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !54 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 -// CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TMP19]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !54 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1498,12 +1667,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1513,43 +1683,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 // CHECK1-NEXT: call void @_Z3fn3v(), !llvm.access.group !57 @@ -1557,17 +1733,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1628,85 +1804,97 @@ // CHECK3-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK3-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[ARG]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !nontemporal !10, !llvm.access.group !9 -// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK3-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !9 +// CHECK3-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !9 +// CHECK3-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[ARG]], align 4, !nontemporal !10, !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 8, !llvm.access.group !9 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1715,13 +1903,14 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1731,63 +1920,70 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[ARG]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: store i32 0, i32* [[CONV]], align 4, !nontemporal !10, !llvm.access.group !14 +// CHECK3-NEXT: store i32 0, i32* [[ARG]], align 4, !nontemporal !10, !llvm.access.group !14 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1798,75 +1994,90 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l53 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !19 +// CHECK3-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !19 +// CHECK3-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !19 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !19 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !19 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !19 -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 +// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP16]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !19 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !19 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1875,12 +2086,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1890,43 +2102,49 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 // CHECK3-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !22 @@ -1934,17 +2152,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2021,70 +2239,85 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l78 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !25 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !25 +// CHECK3-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !25 +// CHECK3-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !25 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !25 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2093,12 +2326,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2108,43 +2342,49 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 // CHECK3-NEXT: call void @_Z3fn4v(), !llvm.access.group !28 @@ -2152,17 +2392,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2173,75 +2413,90 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @.omp_outlined..7(i32* [[TMP16]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2250,12 +2505,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2265,43 +2521,49 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn5v() @@ -2309,17 +2571,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2331,152 +2593,173 @@ // CHECK3-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK3-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV1]], align 1 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[TMP0]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK3-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE7:%.*]] // CHECK3: omp_if.then: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 -// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 -// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !34 -// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 -// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !34 -// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !34 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !34 -// CHECK3-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] -// CHECK3: omp_if.then5: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !34 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK3-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK3-NEXT: store i64 [[TMP14]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !34 +// CHECK3-NEXT: store i64 [[TMP16]], i64* [[TMP15]], align 8, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !34 +// CHECK3-NEXT: store i64 [[TMP18]], i64* [[TMP17]], align 8, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP20:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !34 +// CHECK3-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK3-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL4]], i8* [[TMP19]], align 8, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP21:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !34 +// CHECK3-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP21]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3: omp_if.then6: +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !34 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !34 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !34 -// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !34 -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 +// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP22]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !34 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !34 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END22:%.*]] -// CHECK3: omp_if.else6: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] -// CHECK3: omp.inner.for.cond7: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END21:%.*]] -// CHECK3: omp.inner.for.body9: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK3-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 -// CHECK3-NEXT: [[CONV12:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED11]] to i8* -// CHECK3-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL10]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL13]], i8* [[CONV12]], align 1 -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED11]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP26]] to i1 +// CHECK3: omp_if.else7: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK3: omp.inner.for.cond8: +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END21:%.*]] +// CHECK3: omp.inner.for.body10: +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK3-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK3-NEXT: store i64 [[TMP30]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP32]], i64* [[TMP31]], align 8 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: store i64 [[TMP34]], i64* [[TMP33]], align 8 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP36:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP36]] to i1 +// CHECK3-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL12]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL13]], i8* [[TMP35]], align 8 +// CHECK3-NEXT: [[TMP37:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP37]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK3: omp_if.then15: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]]) // CHECK3-NEXT: br label [[OMP_IF_END18:%.*]] // CHECK3: omp_if.else16: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @.omp_outlined..10(i32* [[TMP38]], i32* [[DOTBOUND_ZERO_ADDR17]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]) // CHECK3-NEXT: br label [[OMP_IF_END18]] // CHECK3: omp_if.end18: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC19:%.*]] // CHECK3: omp.inner.for.inc19: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK3-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK3: omp.inner.for.end21: // CHECK3-NEXT: br label [[OMP_IF_END22]] // CHECK3: omp_if.end22: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK3-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2485,13 +2768,14 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2501,49 +2785,58 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK3-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 // CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !38 @@ -2551,38 +2844,38 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP14]], 99 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK3: cond.true6: // CHECK3-NEXT: br label [[COND_END8:%.*]] // CHECK3: cond.false7: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END8]] // CHECK3: cond.end8: -// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP15]], [[COND_FALSE7]] ] +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] // CHECK3-NEXT: store i32 [[COND9]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] // CHECK3: omp.inner.for.cond10: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] // CHECK3: omp.inner.for.body12: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] // CHECK3-NEXT: store i32 [[ADD14]], i32* [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn6v() @@ -2590,8 +2883,8 @@ // CHECK3: omp.body.continue15: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] // CHECK3: omp.inner.for.inc16: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK3: omp.inner.for.end18: @@ -2599,12 +2892,12 @@ // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2613,13 +2906,14 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2629,49 +2923,58 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK3-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42 // CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !42 @@ -2679,38 +2982,38 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP14]], 99 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK3: cond.true6: // CHECK3-NEXT: br label [[COND_END8:%.*]] // CHECK3: cond.false7: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END8]] // CHECK3: cond.end8: -// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP15]], [[COND_FALSE7]] ] +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] // CHECK3-NEXT: store i32 [[COND9]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] // CHECK3: omp.inner.for.cond10: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] // CHECK3: omp.inner.for.body12: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] // CHECK3-NEXT: store i32 [[ADD14]], i32* [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn6v() @@ -2718,8 +3021,8 @@ // CHECK3: omp.body.continue15: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] // CHECK3: omp.inner.for.inc16: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK3: omp.inner.for.end18: @@ -2727,12 +3030,12 @@ // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2798,70 +3101,85 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..13 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !46 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !46 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !46 +// CHECK3-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !46 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !46 +// CHECK3-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !46 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !46 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2870,12 +3188,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2885,43 +3204,49 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 // CHECK3-NEXT: call void @_Z3fn1v(), !llvm.access.group !49 @@ -2929,17 +3254,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2950,75 +3275,90 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l66 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..15 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP16]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3027,12 +3367,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3042,43 +3383,49 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn2v() @@ -3086,17 +3433,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3108,96 +3455,112 @@ // CHECK3-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 1 // CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK3-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV1]], align 1 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[TMP0]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK3-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !54 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !54 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 +// CHECK3-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK3-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !54 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !54 +// CHECK3-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !54 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !54 +// CHECK3-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !54 +// CHECK3-NEXT: [[TMP18:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !54 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !54 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !54 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 -// CHECK3-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK3-NEXT: call void @.omp_outlined..18(i32* [[TMP19]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !54 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3206,12 +3569,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3221,43 +3585,49 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 // CHECK3-NEXT: call void @_Z3fn3v(), !llvm.access.group !57 @@ -3265,17 +3635,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3929,85 +4299,97 @@ // CHECK9-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[ARG]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[ARG]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !13 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4016,13 +4398,14 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4032,63 +4415,70 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[ARG]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: store i32 0, i32* [[CONV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: store i32 0, i32* [[ARG]], align 4, !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4099,75 +4489,90 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l53 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !22 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !22 // CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !22 -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 +// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP16]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !22 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !22 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4176,12 +4581,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4191,43 +4597,49 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 // CHECK9-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !25 @@ -4235,17 +4647,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4322,70 +4734,85 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l78 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !28 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !28 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !28 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !28 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !28 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4394,12 +4821,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4409,43 +4837,49 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 // CHECK9-NEXT: call void @_Z3fn4v(), !llvm.access.group !31 @@ -4453,17 +4887,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4474,75 +4908,90 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !34 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !34 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !34 +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !34 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 // CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !34 -// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !34 -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 +// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP16]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !34 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !34 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4551,12 +5000,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4566,43 +5016,49 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 // CHECK9-NEXT: call void @_Z3fn5v(), !llvm.access.group !37 @@ -4610,17 +5066,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4632,96 +5088,112 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK9-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK9-NEXT: store i8 [[FROMBOOL]], i8* [[CONV1]], align 1 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK9-NEXT: store i8 [[FROMBOOL]], i8* [[TMP0]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK9-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !40 -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !40 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !40 +// CHECK9-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP18:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !40 +// CHECK9-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK9-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !40 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !40 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !40 +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !40 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !40 // CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !40 -// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !40 -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 +// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP19]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !40 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !40 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4730,12 +5202,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4745,43 +5218,49 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 // CHECK9-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 @@ -4789,17 +5268,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4865,70 +5344,85 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..12 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !46 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !46 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !46 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !46 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4937,12 +5431,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4952,43 +5447,49 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 // CHECK9-NEXT: call void @_Z3fn1v(), !llvm.access.group !49 @@ -4996,17 +5497,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5017,75 +5518,90 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l66 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..14 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !52 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !52 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !52 +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !52 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !52 // CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !52 -// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !52 -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 +// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP16]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !52 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !52 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5094,12 +5610,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5109,43 +5626,49 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55 // CHECK9-NEXT: call void @_Z3fn2v(), !llvm.access.group !55 @@ -5153,17 +5676,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5175,96 +5698,112 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK9-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK9-NEXT: store i8 [[FROMBOOL]], i8* [[CONV1]], align 1 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK9-NEXT: store i8 [[FROMBOOL]], i8* [[TMP0]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK9-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !58 -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !58 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !58 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !58 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !58 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !58 +// CHECK9-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !58 +// CHECK9-NEXT: [[TMP18:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !58 +// CHECK9-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK9-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !58 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !58 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 // CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !58 -// CHECK9-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 +// CHECK9-NEXT: call void @.omp_outlined..17(i32* [[TMP19]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !58 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !58 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5273,12 +5812,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5288,43 +5828,49 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 // CHECK9-NEXT: call void @_Z3fn3v(), !llvm.access.group !61 @@ -5332,17 +5878,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5403,85 +5949,97 @@ // CHECK11-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK11-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK11-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[ARG]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !nontemporal !14, !llvm.access.group !13 -// CHECK11-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK11-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK11-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !13 +// CHECK11-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !13 +// CHECK11-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[ARG]], align 4, !nontemporal !14, !llvm.access.group !13 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 8, !llvm.access.group !13 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !13 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK11-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5490,13 +6048,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5506,63 +6065,70 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[ARG]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: store i32 0, i32* [[CONV]], align 4, !nontemporal !14, !llvm.access.group !18 +// CHECK11-NEXT: store i32 0, i32* [[ARG]], align 4, !nontemporal !14, !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK11-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5573,75 +6139,90 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l53 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !23 +// CHECK11-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !23 +// CHECK11-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !23 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !23 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !23 // CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !23 -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 +// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP16]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !23 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !23 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5650,12 +6231,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5665,43 +6247,49 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 // CHECK11-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !26 @@ -5709,17 +6297,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5796,70 +6384,85 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l78 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !29 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !29 +// CHECK11-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !29 +// CHECK11-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !29 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !29 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK11-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5868,12 +6471,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5883,43 +6487,49 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 // CHECK11-NEXT: call void @_Z3fn4v(), !llvm.access.group !32 @@ -5927,17 +6537,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5948,75 +6558,90 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @.omp_outlined..7(i32* [[TMP16]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6025,12 +6650,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6040,43 +6666,49 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn5v() @@ -6084,17 +6716,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6106,152 +6738,173 @@ // CHECK11-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK11-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV1]], align 1 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[TMP0]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK11-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE7:%.*]] // CHECK11: omp_if.then: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 -// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 -// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !38 -// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 -// CHECK11-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !38 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !38 -// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !38 -// CHECK11-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] -// CHECK11: omp_if.then5: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !38 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK11-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK11-NEXT: store i64 [[TMP14]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !38 +// CHECK11-NEXT: store i64 [[TMP16]], i64* [[TMP15]], align 8, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !38 +// CHECK11-NEXT: store i64 [[TMP18]], i64* [[TMP17]], align 8, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP20:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !38 +// CHECK11-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK11-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL4]], i8* [[TMP19]], align 8, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP21:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !38 +// CHECK11-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP21]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11: omp_if.then6: +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !38 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !38 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 // CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !38 -// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !38 -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 +// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP22]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !38 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !38 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END22:%.*]] -// CHECK11: omp_if.else6: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] -// CHECK11: omp.inner.for.cond7: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END21:%.*]] -// CHECK11: omp.inner.for.body9: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK11-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK11-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 -// CHECK11-NEXT: [[CONV12:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED11]] to i8* -// CHECK11-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL10]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL13]], i8* [[CONV12]], align 1 -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED11]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK11-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP26]] to i1 +// CHECK11: omp_if.else7: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK11: omp.inner.for.cond8: +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END21:%.*]] +// CHECK11: omp.inner.for.body10: +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK11-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK11-NEXT: store i64 [[TMP30]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP32]], i64* [[TMP31]], align 8 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: store i64 [[TMP34]], i64* [[TMP33]], align 8 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP36:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP36]] to i1 +// CHECK11-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL12]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL13]], i8* [[TMP35]], align 8 +// CHECK11-NEXT: [[TMP37:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP37]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK11: omp_if.then15: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]]) // CHECK11-NEXT: br label [[OMP_IF_END18:%.*]] // CHECK11: omp_if.else16: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @.omp_outlined..10(i32* [[TMP38]], i32* [[DOTBOUND_ZERO_ADDR17]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]) // CHECK11-NEXT: br label [[OMP_IF_END18]] // CHECK11: omp_if.end18: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC19:%.*]] // CHECK11: omp.inner.for.inc19: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK11-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK11: omp.inner.for.end21: // CHECK11-NEXT: br label [[OMP_IF_END22]] // CHECK11: omp_if.end22: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK11-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6260,13 +6913,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6276,49 +6930,58 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK11-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 8 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42 // CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !42 @@ -6326,38 +6989,38 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP14]], 99 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK11: cond.true6: // CHECK11-NEXT: br label [[COND_END8:%.*]] // CHECK11: cond.false7: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END8]] // CHECK11: cond.end8: -// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP15]], [[COND_FALSE7]] ] +// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] // CHECK11-NEXT: store i32 [[COND9]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] // CHECK11: omp.inner.for.cond10: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] // CHECK11: omp.inner.for.body12: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] // CHECK11-NEXT: store i32 [[ADD14]], i32* [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn6v() @@ -6365,8 +7028,8 @@ // CHECK11: omp.body.continue15: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] // CHECK11: omp.inner.for.inc16: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK11: omp.inner.for.end18: @@ -6374,12 +7037,12 @@ // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6388,13 +7051,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6404,49 +7068,58 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK11-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 8 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !46 // CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !46 @@ -6454,38 +7127,38 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP14]], 99 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK11: cond.true6: // CHECK11-NEXT: br label [[COND_END8:%.*]] // CHECK11: cond.false7: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END8]] // CHECK11: cond.end8: -// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP15]], [[COND_FALSE7]] ] +// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] // CHECK11-NEXT: store i32 [[COND9]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] // CHECK11: omp.inner.for.cond10: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] // CHECK11: omp.inner.for.body12: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] // CHECK11-NEXT: store i32 [[ADD14]], i32* [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn6v() @@ -6493,8 +7166,8 @@ // CHECK11: omp.body.continue15: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] // CHECK11: omp.inner.for.inc16: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK11: omp.inner.for.end18: @@ -6502,12 +7175,12 @@ // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6573,70 +7246,85 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..13 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !50 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !50 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !50 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !50 +// CHECK11-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !50 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !50 +// CHECK11-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !50 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !50 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK11-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6645,12 +7333,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6660,43 +7349,49 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 // CHECK11-NEXT: call void @_Z3fn1v(), !llvm.access.group !53 @@ -6704,17 +7399,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6725,75 +7420,90 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l66 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..15 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP16]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6802,12 +7512,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6817,43 +7528,49 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn2v() @@ -6861,17 +7578,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6883,96 +7600,112 @@ // CHECK11-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 1 // CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK11-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 -// CHECK11-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV1]], align 1 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[TMP0]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK11-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !58 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !58 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 +// CHECK11-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK11-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !58 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !58 +// CHECK11-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !58 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !58 +// CHECK11-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !58 +// CHECK11-NEXT: [[TMP18:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !58 +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !58 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !58 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 // CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !58 -// CHECK11-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 +// CHECK11-NEXT: call void @.omp_outlined..18(i32* [[TMP19]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !58 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !58 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK11-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6981,12 +7714,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6996,43 +7730,49 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 // CHECK11-NEXT: call void @_Z3fn3v(), !llvm.access.group !61 @@ -7040,17 +7780,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp @@ -195,10 +195,7 @@ // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], i64* [[SFVAR_ADDR]], align 8 @@ -208,274 +205,291 @@ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_ADDR]] to double* // CHECK1-NEXT: store double* [[CONV]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load volatile double, double* [[TMP0]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP1]], double* [[CONV4]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load float, float* [[CONV2]], align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* -// CHECK1-NEXT: store float [[TMP5]], float* [[CONV6]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load double, double* [[CONV3]], align 8 -// CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[G_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP7]], double* [[CONV7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load volatile double, double* [[TMP1]], align 8 +// CHECK1-NEXT: store double [[TMP2]], double* [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[CONV2]], align 4 +// CHECK1-NEXT: store float [[TMP6]], float* [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, double* [[CONV3]], align 8 +// CHECK1-NEXT: store double [[TMP8]], double* [[TMP7]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]], i64 noundef [[G:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G5:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G16:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR9:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G2:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G13:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], i64* [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[G1_ADDR]] to double* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_ADDR]] to double* -// CHECK1-NEXT: store double* [[CONV]], double** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double, double* [[TMP1]], align 8 +// CHECK1-NEXT: store double [[TMP2]], double* [[G1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[TMP5]], align 4 +// CHECK1-NEXT: store float [[TMP6]], float* [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, double* [[TMP7]], align 8 +// CHECK1-NEXT: store double [[TMP8]], double* [[G]], align 8 +// CHECK1-NEXT: store double* [[G1]], double** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[G16]], double** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[G13]], double** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP7]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP13:%.*]] = load volatile double, double* [[TMP12]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[CONV11:%.*]] = bitcast i64* [[G1_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP13]], double* [[CONV11]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[SVAR8]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CONV12:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[CONV12]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP17:%.*]] = load float, float* [[SFVAR9]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CONV13:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* -// CHECK1-NEXT: store float [[TMP17]], float* [[CONV13]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP19:%.*]] = load double, double* [[G5]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[CONV14:%.*]] = bitcast i64* [[G_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP19]], double* [[CONV14]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]]), !llvm.access.group !4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK1-NEXT: store i64 [[TMP18]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store i64 [[TMP22]], i64* [[TMP21]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store i64 [[TMP24]], i64* [[TMP23]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[_TMP4]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP27:%.*]] = load volatile double, double* [[TMP26]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store double [[TMP27]], double* [[TMP25]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[SVAR5]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load float, float* [[SFVAR6]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: store float [[TMP31]], float* [[TMP30]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP33:%.*]] = load double, double* [[G2]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store double [[TMP33]], double* [[TMP32]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK1-NEXT: br i1 [[TMP39]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load double, double* [[G5]], align 8 -// CHECK1-NEXT: store volatile double [[TMP27]], double* [[CONV3]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load double*, double** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load double, double* [[TMP28]], align 8 -// CHECK1-NEXT: store volatile double [[TMP29]], double* [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK1-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load float, float* [[SFVAR9]], align 4 -// CHECK1-NEXT: store float [[TMP31]], float* [[CONV2]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load double, double* [[G2]], align 8 +// CHECK1-NEXT: store volatile double [[TMP40]], double* [[G]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = load double*, double** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = load double, double* [[TMP41]], align 8 +// CHECK1-NEXT: store volatile double [[TMP42]], double* [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[SVAR5]], align 4 +// CHECK1-NEXT: store i32 [[TMP43]], i32* [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load float, float* [[SFVAR6]], align 4 +// CHECK1-NEXT: store float [[TMP44]], float* [[SFVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]], i64 noundef [[G:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G7:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G18:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP9:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR10:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR11:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], i64* [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[G1_ADDR]] to double* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_ADDR]] to double* -// CHECK1-NEXT: store double* [[CONV]], double** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double, double* [[TMP5]], align 8 +// CHECK1-NEXT: store double [[TMP6]], double* [[G1]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load float, float* [[TMP9]], align 4 +// CHECK1-NEXT: store float [[TMP10]], float* [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 8 +// CHECK1-NEXT: store double [[TMP12]], double* [[G]], align 8 +// CHECK1-NEXT: store double* [[G1]], double** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[G18]], double** [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[G14]], double** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP9]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP11]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: store i32 3, i32* [[SVAR10]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR11]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[G7]], double** [[TMP12]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP9]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: store double* [[TMP14]], double** [[TMP13]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[SVAR10]], i32** [[TMP15]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[SFVAR11]], float** [[TMP16]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !8 +// CHECK1-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP5]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP24]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store i32 3, i32* [[SVAR6]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR7]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[G3]], double** [[TMP25]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load double*, double** [[_TMP5]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store double* [[TMP27]], double** [[TMP26]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SVAR6]], i32** [[TMP28]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[SFVAR7]], float** [[TMP29]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK1-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[G7]], align 8 -// CHECK1-NEXT: store volatile double [[TMP22]], double* [[CONV3]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[TMP23]], align 8 -// CHECK1-NEXT: store volatile double [[TMP24]], double* [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[SVAR10]], align 4 -// CHECK1-NEXT: store i32 [[TMP25]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load float, float* [[SFVAR11]], align 4 -// CHECK1-NEXT: store float [[TMP26]], float* [[CONV2]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, double* [[G3]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], double* [[G]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load double*, double** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, double* [[TMP36]], align 8 +// CHECK1-NEXT: store volatile double [[TMP37]], double* [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[SVAR6]], align 4 +// CHECK1-NEXT: store i32 [[TMP38]], i32* [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load float, float* [[SFVAR7]], align 4 +// CHECK1-NEXT: store float [[TMP39]], float* [[SFVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -514,8 +528,7 @@ // CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SFVAR]], i32* [[SFVAR_ADDR]], align 4 @@ -524,27 +537,29 @@ // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[SFVAR_ADDR]] to float* // CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G_ADDR]], align 4 // CHECK3-NEXT: store double* [[TMP0]], double** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load float, float* [[CONV]], align 4 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[SFVAR_CASTED]] to float* -// CHECK3-NEXT: store float [[TMP5]], float* [[CONV1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, i32, i32, double*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], double* [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[TMP3]], double** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[SVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load float, float* [[CONV]], align 4 +// CHECK3-NEXT: store float [[TMP7]], float* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store double* [[TMP1]], double** [[TMP8]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -552,111 +567,127 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR4:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], i32* [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[SFVAR_ADDR]] to float* -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[TMP0]], double** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load float, float* [[TMP5]], align 4 +// CHECK3-NEXT: store float [[TMP6]], float* [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double*, double** [[TMP7]], align 4 +// CHECK3-NEXT: store double* [[TMP2]], double** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[SVAR5]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[SVAR_CASTED]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP15:%.*]] = load float, float* [[SFVAR6]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CONV8:%.*]] = bitcast i32* [[SFVAR_CASTED]] to float* -// CHECK3-NEXT: store float [[TMP15]], float* [[CONV8]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, i32, i32, double*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP10]], i32 [[TMP11]], double* [[TMP12]], i32 [[TMP14]], i32 [[TMP16]], double* [[G2]]), !llvm.access.group !5 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP2]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store double* [[TMP24]], double** [[TMP23]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[SVAR3]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load float, float* [[SFVAR4]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store float [[TMP28]], float* [[TMP27]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store double* [[G]], double** [[TMP29]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP23:%.*]] = load double, double* [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP23]], double* [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[TMP24]], align 4 -// CHECK3-NEXT: store volatile double [[TMP25]], double* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP26]], i32* [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load float, float* [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP27]], float* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP36]], double* [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP37:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load double, double* [[TMP37]], align 4 +// CHECK3-NEXT: store volatile double [[TMP38]], double* [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[SVAR3]], align 4 +// CHECK3-NEXT: store i32 [[TMP39]], i32* [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load float, float* [[SFVAR4]], align 4 +// CHECK3-NEXT: store float [[TMP40]], float* [[SFVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -664,108 +695,117 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR4:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], i32* [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[SFVAR_ADDR]] to float* -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[TMP0]], double** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load float, float* [[TMP9]], align 4 +// CHECK3-NEXT: store float [[TMP10]], float* [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load double*, double** [[TMP11]], align 4 +// CHECK3-NEXT: store double* [[TMP6]], double** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP13]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store i32 3, i32* [[SVAR5]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G2]], double** [[TMP14]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store double* [[TMP16]], double** [[TMP15]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR5]], i32** [[TMP17]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[SFVAR6]], float** [[TMP18]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !9 +// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP2]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP24]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 3, i32* [[SVAR3]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR4]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP25]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load double*, double** [[_TMP2]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store double* [[TMP27]], double** [[TMP26]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR3]], i32** [[TMP28]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[SFVAR4]], float** [[TMP29]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK3-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP24]], double* [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP25:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load double, double* [[TMP25]], align 4 -// CHECK3-NEXT: store volatile double [[TMP26]], double* [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP27]], i32* [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load float, float* [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP28]], float* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP35]], double* [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP36:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load double, double* [[TMP36]], align 4 +// CHECK3-NEXT: store volatile double [[TMP37]], double* [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[SVAR3]], align 4 +// CHECK3-NEXT: store i32 [[TMP38]], i32* [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load float, float* [[SFVAR4]], align 4 +// CHECK3-NEXT: store float [[TMP39]], float* [[SFVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -918,8 +958,7 @@ // CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK5-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK5-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK5-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -931,63 +970,70 @@ // CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK5-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[CONV3]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP5]], i64 [[TMP7]]) +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK5-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK5-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK5-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK5-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK5-NEXT: [[_TMP7:%.*]] = alloca %struct.S*, align 8 -// CHECK5-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK5-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 8 +// CHECK5-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK5-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* -// CHECK5-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK5-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK5-NEXT: store i32 [[TMP10]], i32* [[SVAR]], align 4 +// CHECK5-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: @@ -997,167 +1043,185 @@ // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK5-NEXT: store %struct.S* [[VAR6]], %struct.S** [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK5-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !5 -// CHECK5-NEXT: [[CONV10:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP15]], i32* [[CONV10]], align 4, !llvm.access.group !5 -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 -// CHECK5-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8, !llvm.access.group !5 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[SVAR8]], align 4, !llvm.access.group !5 -// CHECK5-NEXT: [[CONV11:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP18]], i32* [[CONV11]], align 4, !llvm.access.group !5 -// CHECK5-NEXT: [[TMP19:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8, !llvm.access.group !5 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP12]], i64 [[TMP14]], [2 x i32]* [[VEC4]], i64 [[TMP16]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP17]], i64 [[TMP19]]), !llvm.access.group !5 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK5-NEXT: store i64 [[TMP20]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK5-NEXT: store i64 [[TMP22]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK5-NEXT: store i64 [[TMP24]], i64* [[TMP23]], align 8, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK5-NEXT: store i64 [[TMP26]], i64* [[TMP25]], align 8, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP27]], align 8, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !5 +// CHECK5-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP30]], align 8, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP32:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8, !llvm.access.group !5 +// CHECK5-NEXT: store %struct.S* [[TMP32]], %struct.S** [[TMP31]], align 8, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, i32* [[SVAR4]], align 4, !llvm.access.group !5 +// CHECK5-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 8, !llvm.access.group !5 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK5-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK5-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK5-NEXT: br i1 [[TMP42]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK5-NEXT: store i32 [[TMP28]], i32* [[CONV]], align 4 -// CHECK5-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK5-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP31:%.*]] = bitcast [2 x %struct.S]* [[S_ARR5]] to %struct.S* -// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN12]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN12]], [[TMP32]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP43:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK5-NEXT: store i32 [[TMP43]], i32* [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP44:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK5-NEXT: [[TMP45:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP44]], i8* align 4 [[TMP45]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP46:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK5-NEXT: [[TMP47:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN6]], [[TMP47]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP31]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK5-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP46]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK5-NEXT: [[TMP49:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP48]], i8* align 4 [[TMP49]], i64 4, i1 false) // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP32]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done13: -// CHECK5-NEXT: [[TMP35:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[TMP3]] to i8* -// CHECK5-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP35]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i64 4, i1 false) -// CHECK5-NEXT: [[TMP38:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK5-NEXT: store i32 [[TMP38]], i32* [[CONV1]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP47]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done7: +// CHECK5-NEXT: [[TMP50:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP51:%.*]] = bitcast %struct.S* [[TMP11]] to i8* +// CHECK5-NEXT: [[TMP52:%.*]] = bitcast %struct.S* [[TMP50]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP51]], i8* align 4 [[TMP52]], i64 4, i1 false) +// CHECK5-NEXT: [[TMP53:%.*]] = load i32, i32* [[SVAR4]], align 4 +// CHECK5-NEXT: store i32 [[TMP53]], i32* [[SVAR]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN14]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP54]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done15: +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done9: // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK5-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR5:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC6:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR7:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK5-NEXT: [[VAR8:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK5-NEXT: [[_TMP9:%.*]] = alloca %struct.S*, align 8 -// CHECK5-NEXT: [[SVAR10:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK5-NEXT: [[_TMP4:%.*]] = alloca %struct.S*, align 8 +// CHECK5-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK5-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* -// CHECK5-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK5-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 7 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK5-NEXT: store i32 [[TMP14]], i32* [[SVAR]], align 4 +// CHECK5-NEXT: store %struct.S* [[TMP12]], %struct.S** [[TMP]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK5-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR7]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: @@ -1167,115 +1231,115 @@ // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR8]]) -// CHECK5-NEXT: store %struct.S* [[VAR8]], %struct.S** [[_TMP9]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR5]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC6]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR7]], i64 0, i64 [[IDXPROM12]] -// CHECK5-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX13]] to i8* -// CHECK5-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[TMP16]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !9 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK5-NEXT: store i32 [[TMP26]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP4]], align 8, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK5-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[ARRAYIDX8]] to i8* +// CHECK5-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[TMP28]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 4, i1 false), !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK5-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK5-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK5-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK5-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK5-NEXT: br i1 [[TMP38]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR5]], align 4 -// CHECK5-NEXT: store i32 [[TMP27]], i32* [[CONV]], align 4 -// CHECK5-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK5-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[VEC6]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP30:%.*]] = bitcast [2 x %struct.S]* [[S_ARR7]] to %struct.S* -// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN15]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN15]], [[TMP31]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE16:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, i32* [[T_VAR3]], align 4 +// CHECK5-NEXT: store i32 [[TMP39]], i32* [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP40:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK5-NEXT: [[TMP41:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP10]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP42:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK5-NEXT: [[TMP43:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN10]], [[TMP43]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP30]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN15]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK5-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP42]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[TMP44:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK5-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP44]], i8* align 4 [[TMP45]], i64 4, i1 false) // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP31]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done16: -// CHECK5-NEXT: [[TMP34:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8 -// CHECK5-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[TMP5]] to i8* -// CHECK5-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[TMP34]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false) -// CHECK5-NEXT: [[TMP37:%.*]] = load i32, i32* [[SVAR10]], align 4 -// CHECK5-NEXT: store i32 [[TMP37]], i32* [[CONV1]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP43]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done11: +// CHECK5-NEXT: [[TMP46:%.*]] = load %struct.S*, %struct.S** [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[TMP17]] to i8* +// CHECK5-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[TMP46]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 4, i1 false) +// CHECK5-NEXT: [[TMP49:%.*]] = load i32, i32* [[SVAR5]], align 4 +// CHECK5-NEXT: store i32 [[TMP49]], i32* [[SVAR]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR8]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN17:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR7]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN17]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN12]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP50]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN17]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE18:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done18: +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done13: // CHECK5-NEXT: ret void // // @@ -1293,33 +1357,33 @@ // CHECK5-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK5-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK5-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK5-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 8 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK5-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) -// CHECK5-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 8 -// CHECK5-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 8 +// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i64 0, i64 0 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK5-NEXT: store %struct.S.2* [[TEST]], %struct.S.2** [[VAR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load %struct.S.2*, %struct.S.2** [[VAR]], align 8 +// CHECK5-NEXT: store %struct.S.2* [[TMP1]], %struct.S.2** [[TMP]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP2]], i32* [[CONV]], align 4 // CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 // CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8** [[TMP5]] to [2 x i32]** // CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP6]], align 8 @@ -1337,19 +1401,19 @@ // CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK5-NEXT: store i8* null, i8** [[TMP14]], align 8 // CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK5-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to [2 x %struct.S.0]** -// CHECK5-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to [2 x %struct.S.2]** +// CHECK5-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP16]], align 8 // CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK5-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK5-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.2]** +// CHECK5-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP18]], align 8 // CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 // CHECK5-NEXT: store i8* null, i8** [[TMP19]], align 8 // CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK5-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to %struct.S.0** -// CHECK5-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to %struct.S.2** +// CHECK5-NEXT: store %struct.S.2* [[TMP4]], %struct.S.2** [[TMP21]], align 8 // CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK5-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK5-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.2** +// CHECK5-NEXT: store %struct.S.2* [[TMP4]], %struct.S.2** [[TMP23]], align 8 // CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 // CHECK5-NEXT: store i8* null, i8** [[TMP24]], align 8 // CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -1359,21 +1423,21 @@ // CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 // CHECK5-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK5: omp_offload.failed: -// CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l50([2 x i32]* [[VEC]], i64 [[TMP3]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]] +// CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l50([2 x i32]* [[VEC]], i64 [[TMP3]], [2 x %struct.S.2]* [[S_ARR]], %struct.S.2* [[TMP4]]) #[[ATTR4]] // CHECK5-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK5: omp_offload.cont: // CHECK5-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK5: arraydestroy.done2: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK5-NEXT: [[TMP30:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK5-NEXT: ret i32 [[TMP30]] // @@ -1413,414 +1477,440 @@ // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK5-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK5-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK5-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK5-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK5-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l50 -// CHECK5-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.2]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 // CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.2]*, align 8 +// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK5-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK5-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[S_ARR_ADDR]], align 8 +// CHECK5-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[VAR_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK5-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK5-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[S_ARR_ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S.2*, %struct.S.2** [[VAR_ADDR]], align 8 +// CHECK5-NEXT: store %struct.S.2* [[TMP2]], %struct.S.2** [[TMP]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store [2 x %struct.S.2]* [[TMP1]], [2 x %struct.S.2]** [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK5-NEXT: store %struct.S.2* [[TMP8]], %struct.S.2** [[TMP7]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK5-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK5-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 8 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK5-NEXT: [[_TMP3:%.*]] = alloca %struct.S.2*, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK5-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK5-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK5-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP7]], align 8 +// CHECK5-NEXT: store %struct.S.2* [[TMP8]], %struct.S.2** [[TMP]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: -// CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYCTOR_CUR]], i64 1 +// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK5-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK5-NEXT: [[TMP9:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK5-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK5-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !14 -// CHECK5-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP15]], i32* [[CONV8]], align 4, !llvm.access.group !14 -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 -// CHECK5-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8, !llvm.access.group !14 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP12]], i64 [[TMP14]], [2 x i32]* [[VEC3]], i64 [[TMP16]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP17]]), !llvm.access.group !14 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK5-NEXT: store i64 [[TMP18]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK5-NEXT: store i64 [[TMP20]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !14 +// CHECK5-NEXT: store i64 [[TMP22]], i64* [[TMP21]], align 8, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !14 +// CHECK5-NEXT: store i64 [[TMP24]], i64* [[TMP23]], align 8, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP25]], align 8, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !14 +// CHECK5-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 8, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP28]], align 8, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP30:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP3]], align 8, !llvm.access.group !14 +// CHECK5-NEXT: store %struct.S.2* [[TMP30]], %struct.S.2** [[TMP29]], align 8, !llvm.access.group !14 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !14 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK5-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK5-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK5-NEXT: br i1 [[TMP38]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK5-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK5-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP29:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN9]], [[TMP30]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK5-NEXT: store i32 [[TMP39]], i32* [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP40:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK5-NEXT: [[TMP41:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP42:%.*]] = bitcast [2 x %struct.S.2]* [[S_ARR]] to %struct.S.2* +// CHECK5-NEXT: [[TMP43:%.*]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN5]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.2* [[ARRAY_BEGIN5]], [[TMP43]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK5-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i64 4, i1 false) -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done10: -// CHECK5-NEXT: [[TMP33:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8 -// CHECK5-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[TMP3]] to i8* -// CHECK5-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[TMP33]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP42]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[TMP44:%.*]] = bitcast %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK5-NEXT: [[TMP45:%.*]] = bitcast %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP44]], i8* align 4 [[TMP45]], i64 4, i1 false) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.2* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP43]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done6: +// CHECK5-NEXT: [[TMP46:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP47:%.*]] = bitcast %struct.S.2* [[TMP9]] to i8* +// CHECK5-NEXT: [[TMP48:%.*]] = bitcast %struct.S.2* [[TMP46]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 4, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN7]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done12: +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP49]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done8: // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR4:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC5:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR6:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK5-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK5-NEXT: [[_TMP8:%.*]] = alloca %struct.S.0*, align 8 +// CHECK5-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK5-NEXT: [[_TMP4:%.*]] = alloca %struct.S.2*, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK5-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK5-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK5-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP12:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP11]], align 8 +// CHECK5-NEXT: store %struct.S.2* [[TMP12]], %struct.S.2** [[TMP]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK5-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR6]], i32 0, i32 0 -// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: -// CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYCTOR_CUR]], i64 1 +// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) -// CHECK5-NEXT: store %struct.S.0* [[VAR7]], %struct.S.0** [[_TMP8]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR4]], align 4, !llvm.access.group !17 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC5]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 -// CHECK5-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8, !llvm.access.group !17 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK5-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR6]], i64 0, i64 [[IDXPROM10]] -// CHECK5-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[ARRAYIDX11]] to i8* -// CHECK5-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[TMP16]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !17 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK5-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP26:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP4]], align 8, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK5-NEXT: [[TMP28:%.*]] = bitcast %struct.S.2* [[ARRAYIDX7]] to i8* +// CHECK5-NEXT: [[TMP29:%.*]] = bitcast %struct.S.2* [[TMP26]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 4, i1 false), !llvm.access.group !17 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK5-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK5-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK5-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK5-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK5-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR4]], align 4 -// CHECK5-NEXT: store i32 [[TMP27]], i32* [[CONV]], align 4 -// CHECK5-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK5-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[VEC5]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP30:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR6]] to %struct.S.0* -// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN13]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN13]], [[TMP31]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP37:%.*]] = load i32, i32* [[T_VAR3]], align 4 +// CHECK5-NEXT: store i32 [[TMP37]], i32* [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP38:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK5-NEXT: [[TMP39:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[TMP10]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP40:%.*]] = bitcast [2 x %struct.S.2]* [[S_ARR]] to %struct.S.2* +// CHECK5-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN9]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.2* [[ARRAY_BEGIN9]], [[TMP41]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP30]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK5-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP31]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done14: -// CHECK5-NEXT: [[TMP34:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8 -// CHECK5-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[TMP5]] to i8* -// CHECK5-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[TMP34]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false) +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[TMP42:%.*]] = bitcast %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK5-NEXT: [[TMP43:%.*]] = bitcast %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i64 4, i1 false) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.2* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP41]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done10: +// CHECK5-NEXT: [[TMP44:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP45:%.*]] = bitcast %struct.S.2* [[TMP15]] to i8* +// CHECK5-NEXT: [[TMP46:%.*]] = bitcast %struct.S.2* [[TMP44]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR6]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN15]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN11]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP37]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done16: +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP47]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done12: // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK5-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK5-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiED2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK5-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK5-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: store i32 0, i32* [[F]], align 4 // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK5-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK5-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK5-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK5-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK5-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 // CHECK5-NEXT: ret void // // @@ -1969,8 +2059,7 @@ // CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK7-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK7-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK7-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK7-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1980,27 +2069,31 @@ // CHECK7-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 // CHECK7-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[SVAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP6]], i32* [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP5]], i32 [[TMP7]]) +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK7-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[SVAR_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK7-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2008,31 +2101,38 @@ // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 4 -// CHECK7-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 4 +// CHECK7-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 +// CHECK7-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], i32* [[SVAR]], align 4 +// CHECK7-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: @@ -2042,123 +2142,135 @@ // CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store %struct.S* [[VAR5]], %struct.S** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK7-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK7-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: store i32 [[TMP13]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[TMP15:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[SVAR7]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: store i32 [[TMP16]], i32* [[SVAR_CASTED]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP11]], i32 [[TMP12]], [2 x i32]* [[VEC3]], i32 [[TMP14]], [2 x %struct.S]* [[S_ARR4]], %struct.S* [[TMP15]], i32 [[TMP17]]), !llvm.access.group !6 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: store i32 [[TMP24]], i32* [[TMP23]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP25]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP28]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP30:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: store %struct.S* [[TMP30]], %struct.S** [[TMP29]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, i32* [[SVAR4]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !6 // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK7-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK7-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK7-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 2, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK7-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK7-NEXT: br i1 [[TMP40]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP26]], i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK7-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP29:%.*]] = bitcast [2 x %struct.S]* [[S_ARR4]] to %struct.S* -// CHECK7-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN9]], [[TMP30]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP41:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP41]], i32* [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP42:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK7-NEXT: [[TMP43:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP44:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK7-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN6]], [[TMP45]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP29]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK7-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i32 4, i1 false) +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP44]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK7-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i32 4, i1 false) // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done10: -// CHECK7-NEXT: [[TMP33:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP3]] to i8* -// CHECK7-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[TMP33]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i32 4, i1 false) -// CHECK7-NEXT: [[TMP36:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK7-NEXT: store i32 [[TMP36]], i32* [[SVAR_ADDR]], align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP45]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done7: +// CHECK7-NEXT: [[TMP48:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP49:%.*]] = bitcast %struct.S* [[TMP11]] to i8* +// CHECK7-NEXT: [[TMP50:%.*]] = bitcast %struct.S* [[TMP48]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP49]], i8* align 4 [[TMP50]], i32 4, i1 false) +// CHECK7-NEXT: [[TMP51:%.*]] = load i32, i32* [[SVAR4]], align 4 +// CHECK7-NEXT: store i32 [[TMP51]], i32* [[SVAR]], align 4 // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP37]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP52]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done12: +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done9: // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK7-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2167,34 +2279,44 @@ // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 4 -// CHECK7-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 4 +// CHECK7-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 +// CHECK7-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], i32* [[SVAR]], align 4 +// CHECK7-NEXT: store %struct.S* [[TMP12]], %struct.S** [[TMP]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: @@ -2204,113 +2326,113 @@ // CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store %struct.S* [[VAR5]], %struct.S** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK7-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK7-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 [[TMP17]] -// CHECK7-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* -// CHECK7-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[TMP16]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false), !llvm.access.group !10 +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP27]] +// CHECK7-NEXT: store i32 [[TMP26]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP29]] +// CHECK7-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* +// CHECK7-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[TMP28]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 4, i1 false), !llvm.access.group !10 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK7-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK7-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK7-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK7-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 2, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK7-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK7-NEXT: br i1 [[TMP38]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP27]], i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK7-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP30:%.*]] = bitcast [2 x %struct.S]* [[S_ARR4]] to %struct.S* -// CHECK7-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN11]], [[TMP31]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP39:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP39]], i32* [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP40:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK7-NEXT: [[TMP41:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP10]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP42:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK7-NEXT: [[TMP43:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN8]], [[TMP43]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP30]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK7-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false) +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP42]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[TMP44:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK7-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP44]], i8* align 4 [[TMP45]], i32 4, i1 false) // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP31]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done12: -// CHECK7-NEXT: [[TMP34:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[TMP5]] to i8* -// CHECK7-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[TMP34]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i32 4, i1 false) -// CHECK7-NEXT: [[TMP37:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK7-NEXT: store i32 [[TMP37]], i32* [[SVAR_ADDR]], align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP43]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done9: +// CHECK7-NEXT: [[TMP46:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[TMP17]] to i8* +// CHECK7-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[TMP46]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i32 4, i1 false) +// CHECK7-NEXT: [[TMP49:%.*]] = load i32, i32* [[SVAR4]], align 4 +// CHECK7-NEXT: store i32 [[TMP49]], i32* [[SVAR]], align 4 // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN13]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP50]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done14: +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done11: // CHECK7-NEXT: ret void // // @@ -2328,32 +2450,32 @@ // CHECK7-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK7-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK7-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK7-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK7-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK7-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK7-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK7-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK7-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK7-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK7-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 4 -// CHECK7-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 4 +// CHECK7-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK7-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK7-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK7-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR]], align 4 +// CHECK7-NEXT: store %struct.S.1* [[TMP1]], %struct.S.1** [[TMP]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK7-NEXT: store i32 [[TMP2]], i32* [[T_VAR_CASTED]], align 4 // CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 // CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP6:%.*]] = bitcast i8** [[TMP5]] to [2 x i32]** // CHECK7-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP6]], align 4 @@ -2371,19 +2493,19 @@ // CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK7-NEXT: store i8* null, i8** [[TMP14]], align 4 // CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to [2 x %struct.S.0]** -// CHECK7-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP16]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to [2 x %struct.S.1]** +// CHECK7-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP16]], align 4 // CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK7-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.1]** +// CHECK7-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP18]], align 4 // CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 // CHECK7-NEXT: store i8* null, i8** [[TMP19]], align 4 // CHECK7-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK7-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to %struct.S.0** -// CHECK7-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[TMP21]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to %struct.S.1** +// CHECK7-NEXT: store %struct.S.1* [[TMP4]], %struct.S.1** [[TMP21]], align 4 // CHECK7-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK7-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK7-NEXT: store %struct.S.0* [[TMP4]], %struct.S.0** [[TMP23]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.1** +// CHECK7-NEXT: store %struct.S.1* [[TMP4]], %struct.S.1** [[TMP23]], align 4 // CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 // CHECK7-NEXT: store i8* null, i8** [[TMP24]], align 4 // CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -2393,21 +2515,21 @@ // CHECK7-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 // CHECK7-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK7: omp_offload.failed: -// CHECK7-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l50([2 x i32]* [[VEC]], i32 [[TMP3]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]] +// CHECK7-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l50([2 x i32]* [[VEC]], i32 [[TMP3]], [2 x %struct.S.1]* [[S_ARR]], %struct.S.1* [[TMP4]]) #[[ATTR4]] // CHECK7-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK7: omp_offload.cont: // CHECK7-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK7: arraydestroy.done2: -// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK7-NEXT: [[TMP30:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK7-NEXT: ret i32 [[TMP30]] // @@ -2447,212 +2569,234 @@ // // // CHECK7-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK7-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK7-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK7-NEXT: entry: -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK7-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK7-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK7-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK7-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK7-NEXT: entry: -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK7-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l50 -// CHECK7-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: ([2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.1]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 // CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 4 +// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK7-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 +// CHECK7-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK7-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK7-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 4 +// CHECK7-NEXT: store %struct.S.1* [[TMP2]], %struct.S.1** [[TMP]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store [2 x %struct.S.1]* [[TMP1]], [2 x %struct.S.1]** [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK7-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP7]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca %struct.S.1*, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK7-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 4 +// CHECK7-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: -// CHECK7-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK7-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK7-NEXT: [[TMP9:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK7-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK7-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK7-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !15 -// CHECK7-NEXT: store i32 [[TMP13]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 -// CHECK7-NEXT: [[TMP15:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4, !llvm.access.group !15 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP11]], i32 [[TMP12]], [2 x i32]* [[VEC3]], i32 [[TMP14]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP15]]), !llvm.access.group !15 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: store i32 [[TMP17]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP23]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP26]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP28:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: store %struct.S.1* [[TMP28]], %struct.S.1** [[TMP27]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK7-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK7-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 2, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK7-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK7-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP24]], i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK7-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP27:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN8]], [[TMP28]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP37:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP37]], i32* [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP38:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK7-NEXT: [[TMP39:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP40:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK7-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN5]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN5]], [[TMP41]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP27]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[TMP29:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK7-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i32 4, i1 false) -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done9: -// CHECK7-NEXT: [[TMP31:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[TMP3]] to i8* -// CHECK7-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[TMP31]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false) +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[TMP42:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK7-NEXT: [[TMP43:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i32 4, i1 false) +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP41]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done6: +// CHECK7-NEXT: [[TMP44:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP45:%.*]] = bitcast %struct.S.1* [[TMP9]] to i8* +// CHECK7-NEXT: [[TMP46:%.*]] = bitcast %struct.S.1* [[TMP44]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i32 4, i1 false) // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN7]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP34]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done11: +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP47]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done8: // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2660,190 +2804,198 @@ // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca %struct.S.1*, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK7-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP11]], align 4 +// CHECK7-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: -// CHECK7-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK7-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK7-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK7-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK7-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 [[TMP17]] -// CHECK7-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* -// CHECK7-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[TMP16]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false), !llvm.access.group !18 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP25]] +// CHECK7-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP26:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK7-NEXT: [[TMP28:%.*]] = bitcast %struct.S.1* [[ARRAYIDX5]] to i8* +// CHECK7-NEXT: [[TMP29:%.*]] = bitcast %struct.S.1* [[TMP26]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 4, i1 false), !llvm.access.group !18 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK7-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK7-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK7-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK7-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 2, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK7-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK7-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP27]], i32* [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK7-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP30:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK7-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN10]], [[TMP31]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP37:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP37]], i32* [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP38:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK7-NEXT: [[TMP39:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP10]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP40:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK7-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN7]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN7]], [[TMP41]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP30]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK7-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false) -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP31]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done11: -// CHECK7-NEXT: [[TMP34:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[TMP5]] to i8* -// CHECK7-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[TMP34]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i32 4, i1 false) +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[TMP42:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK7-NEXT: [[TMP43:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i32 4, i1 false) +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP41]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done8: +// CHECK7-NEXT: [[TMP44:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP45:%.*]] = bitcast %struct.S.1* [[TMP15]] to i8* +// CHECK7-NEXT: [[TMP46:%.*]] = bitcast %struct.S.1* [[TMP44]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i32 4, i1 false) // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN9]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP37]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done13: +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP47]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done10: // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK7-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK7-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK7-NEXT: entry: -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK7-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK7-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK7-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK7-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK7-NEXT: entry: -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK7-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK7-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK7-NEXT: store i32 0, i32* [[F]], align 4 // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK7-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK7-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK7-NEXT: entry: -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK7-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK7-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK7-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK7-NEXT: entry: -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK7-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK7-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK7-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp @@ -314,29 +314,36 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -352,62 +359,70 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !5 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -417,12 +432,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -437,14 +453,20 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -460,77 +482,77 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM3]] -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false), !llvm.access.group !9 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !9 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP27]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -543,23 +565,23 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) -// CHECK1-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK1-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK1-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 8 +// CHECK1-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK1-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2) // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0) // CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 @@ -569,306 +591,328 @@ // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done2: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK1-NEXT: ret i32 [[TMP4]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !14 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN4]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false), !llvm.access.group !17 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP17:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S.1* [[ARRAYIDX6]] to i8* +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S.1* [[TMP17]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 4, i1 false), !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP21]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP26]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done9: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -877,11 +921,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -1035,29 +1079,36 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -1073,60 +1124,68 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !6 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[TMP10]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i32 2 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -1136,12 +1195,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1156,14 +1216,20 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 @@ -1177,75 +1243,75 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP12]] -// CHECK3-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false), !llvm.access.group !10 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP17]] +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false), !llvm.access.group !10 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] // CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4, !llvm.access.group !10 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP27]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1258,23 +1324,23 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK3-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 4 -// CHECK3-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK3-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 4 +// CHECK3-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK3-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2) // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0) // CHECK3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 @@ -1284,300 +1350,322 @@ // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK3: arraydestroy.done2: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK3-NEXT: ret i32 [[TMP4]] // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK3-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: -// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK3-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !15 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[TMP10]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN4]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK3: arraydestroy.done5: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: -// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK3-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false), !llvm.access.group !18 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP17:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast %struct.S.1* [[ARRAYIDX4]] to i8* +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast %struct.S.1* [[TMP17]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i32 4, i1 false), !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP21]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP26]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK3: arraydestroy.done7: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1586,11 +1674,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1736,15 +1824,17 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104 // CHECK5-SAME: () #[[ATTR5:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 @@ -1752,61 +1842,74 @@ // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32* undef, i32** [[_TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: store i32* [[G1]], i32** [[_TMP2]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK5-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1815,12 +1918,13 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 @@ -1833,77 +1937,83 @@ // CHECK5-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK5-NEXT: store i32* undef, i32** [[_TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: store i32* [[G1]], i32** [[_TMP3]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 // CHECK5-NEXT: store i32 1, i32* [[G]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !8 -// CHECK5-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: store volatile i32 1, i32* [[TMP15]], align 4, !llvm.access.group !8 // CHECK5-NEXT: store i32 2, i32* [[SIVAR]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store i32* [[G]], i32** [[TMP11]], align 8, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !8 -// CHECK5-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK5-NEXT: store i32* [[SIVAR]], i32** [[TMP14]], align 8, !llvm.access.group !8 -// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store i32* [[G]], i32** [[TMP16]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: store i32* [[TMP18]], i32** [[TMP17]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK5-NEXT: store i32* [[SIVAR]], i32** [[TMP19]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !8 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK5-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2824,29 +2934,36 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK13-SAME: () #[[ATTR0:[0-9]+]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -2862,62 +2979,70 @@ // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: // CHECK13-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !6 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !6 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK13-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK13-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK13-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 +// CHECK13-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -2937,12 +3062,13 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2957,14 +3083,20 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -2980,77 +3112,77 @@ // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: // CHECK13-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP17]] to i64 // CHECK13-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM3]] -// CHECK13-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* -// CHECK13-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false), !llvm.access.group !10 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK13-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !10 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] // CHECK13-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4, !llvm.access.group !10 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK13-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK13-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK13-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP27]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -3072,251 +3204,273 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK13-SAME: () #[[ATTR0]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK13-NEXT: [[_TMP1:%.*]] = alloca %struct.S.2*, align 8 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK13-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 8 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK13-NEXT: [[_TMP2:%.*]] = alloca %struct.S.2*, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK13-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: store %struct.S.2* undef, %struct.S.2** [[_TMP1]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 // CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK13: arrayctor.loop: -// CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] +// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYCTOR_CUR]], i64 1 +// CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: -// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK13-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK13-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP2]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK13-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK13-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN4]], i64 2 +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN4]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK13: arraydestroy.done5: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK13-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK13-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK13-NEXT: entry: -// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK13-NEXT: [[_TMP1:%.*]] = alloca %struct.S.2*, align 8 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK13-NEXT: [[_TMP3:%.*]] = alloca %struct.S.0*, align 8 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK13-NEXT: [[_TMP3:%.*]] = alloca %struct.S.2*, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK13-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store %struct.S.2* undef, %struct.S.2** [[_TMP1]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 // CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK13: arrayctor.loop: -// CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] +// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYCTOR_CUR]], i64 1 +// CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: -// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK13-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP3]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK13-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP3]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8, !llvm.access.group !18 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK13-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* -// CHECK13-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false), !llvm.access.group !18 +// CHECK13-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP17:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP3]], align 8, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK13-NEXT: [[TMP19:%.*]] = bitcast %struct.S.2* [[ARRAYIDX6]] to i8* +// CHECK13-NEXT: [[TMP20:%.*]] = bitcast %struct.S.2* [[TMP17]] to i8* +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 4, i1 false), !llvm.access.group !18 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK13-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i64 2 +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN8]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP21]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP26]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK13: arraydestroy.done9: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK13-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK13-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK13-NEXT: entry: -// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: call void @_ZN1SIiED2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] // CHECK13-NEXT: ret void // // @@ -3343,52 +3497,59 @@ // // // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK13-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK13-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK13-NEXT: entry: -// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 // CHECK13-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 4 // CHECK13-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK13-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK13-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK13-NEXT: entry: -// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK13-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK13-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 // CHECK13-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK15-SAME: () #[[ATTR0:[0-9]+]] { // CHECK15-NEXT: entry: -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -3404,60 +3565,68 @@ // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: // CHECK15-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK15-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !7 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[TMP10]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !7 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK15-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK15-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, i32* [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: // CHECK15-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK15-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i32 2 +// CHECK15-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -3477,12 +3646,13 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3497,14 +3667,20 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 @@ -3518,75 +3694,75 @@ // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: // CHECK15-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK15-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP12]] -// CHECK15-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* -// CHECK15-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false), !llvm.access.group !11 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP17]] +// CHECK15-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* +// CHECK15-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false), !llvm.access.group !11 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] // CHECK15-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4, !llvm.access.group !11 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK15-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK15-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK15-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, i32* [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: // CHECK15-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 +// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP27]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -3608,245 +3784,267 @@ // CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK15-SAME: () #[[ATTR0]] { // CHECK15-NEXT: entry: -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK15-NEXT: [[_TMP1:%.*]] = alloca %struct.S.2*, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK15-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK15-NEXT: [[_TMP2:%.*]] = alloca %struct.S.2*, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK15-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: store %struct.S.2* undef, %struct.S.2** [[_TMP1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i32 2 // CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK15: arrayctor.loop: -// CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] +// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYCTOR_CUR]], i32 1 +// CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: -// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK15-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK15-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP2]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !16 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[TMP10]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !16 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK15-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK15-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, i32* [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN4]], i32 2 +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN4]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK15: arraydestroy.done5: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK15-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK15-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK15-NEXT: entry: -// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 4 +// CHECK15-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK15-NEXT: [[_TMP1:%.*]] = alloca %struct.S.2*, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK15-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK15-NEXT: [[_TMP2:%.*]] = alloca %struct.S.2*, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK15-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store %struct.S.2* undef, %struct.S.2** [[_TMP1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i32 2 // CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK15: arrayctor.loop: -// CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] +// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYCTOR_CUR]], i32 1 +// CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: -// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK15-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK15-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP2]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK15-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK15-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* -// CHECK15-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false), !llvm.access.group !19 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP17:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP2]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK15-NEXT: [[TMP19:%.*]] = bitcast %struct.S.2* [[ARRAYIDX4]] to i8* +// CHECK15-NEXT: [[TMP20:%.*]] = bitcast %struct.S.2* [[TMP17]] to i8* +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i32 4, i1 false), !llvm.access.group !19 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK15-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK15-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK15-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, i32* [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i32 2 +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN6]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP21]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP26]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK15: arraydestroy.done7: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK15-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK15-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK15-NEXT: entry: -// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 4 +// CHECK15-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: call void @_ZN1SIiED2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] // CHECK15-NEXT: ret void // // @@ -3873,38 +4071,40 @@ // // // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK15-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK15-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK15-NEXT: entry: -// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 4 +// CHECK15-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 // CHECK15-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 4 // CHECK15-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK15-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK15-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK15-NEXT: entry: -// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK15-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 4 +// CHECK15-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 4 // CHECK15-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104 // CHECK17-SAME: () #[[ATTR0:[0-9]+]] { // CHECK17-NEXT: entry: -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 @@ -3912,61 +4112,74 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: store i32* undef, i32** [[_TMP1]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK17-NEXT: store i32* [[G1]], i32** [[_TMP2]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !5 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK17-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK17-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK17-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 2, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3975,12 +4188,13 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 @@ -3996,74 +4210,80 @@ // CHECK17-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK17-NEXT: store i32* undef, i32** [[_TMP1]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK17-NEXT: store i32* [[G1]], i32** [[_TMP3]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 // CHECK17-NEXT: store i32 1, i32* [[G]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !9 -// CHECK17-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: store volatile i32 1, i32* [[TMP15]], align 4, !llvm.access.group !9 // CHECK17-NEXT: store i32 2, i32* [[SIVAR]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0 -// CHECK17-NEXT: store i32* [[G]], i32** [[TMP11]], align 8, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !9 -// CHECK17-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 2 -// CHECK17-NEXT: store i32* [[SIVAR]], i32** [[TMP14]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[G]], i32** [[TMP16]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: store i32* [[TMP18]], i32** [[TMP17]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[SIVAR]], i32** [[TMP19]], align 8, !llvm.access.group !9 // CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]], !llvm.access.group !9 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK17-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 2, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp @@ -78,71 +78,86 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l36 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 4), !llvm.access.group !6 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !6 +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 4), !llvm.access.group !6 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !6 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -151,12 +166,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -166,60 +182,66 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -230,71 +252,86 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l38 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 3), !llvm.access.group !15 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 3), !llvm.access.group !15 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -303,12 +340,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -318,60 +356,66 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -397,71 +441,86 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l30 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !21 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !21 +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group !21 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[TMP12]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -470,12 +529,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -485,60 +545,66 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp @@ -125,209 +125,233 @@ // CHECK1-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i32* [[SIVAR1]]), !llvm.access.group !5 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[TMP18]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP25]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP26]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP29]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR2]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[SIVAR2]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[SIVAR2]] to i8* -// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, i8* [[TMP24]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP28]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -418,209 +442,233 @@ // CHECK1-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[T_VAR1]], align 4 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i32* [[T_VAR1]]), !llvm.access.group !14 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP18]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[T_VAR1]] to i8* -// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP25]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP26]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP29]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[T_VAR2]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[T_VAR]], align 4, !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[T_VAR2]] to i8* -// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, i8* [[TMP24]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP28]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -716,205 +764,229 @@ // CHECK3-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], i32* [[SIVAR1]]), !llvm.access.group !6 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[TMP16]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP22:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP23]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP27]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR1]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[SIVAR]], align 4, !llvm.access.group !10 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, i8* [[TMP24]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP28]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1005,205 +1077,229 @@ // CHECK3-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[T_VAR1]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], i32* [[T_VAR1]]), !llvm.access.group !15 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[T_VAR]], i32** [[TMP16]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i32* [[T_VAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP22:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP23]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP27]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR1]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[T_VAR1]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[T_VAR]], align 4, !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i32* [[T_VAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, i8* [[TMP24]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP28]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1278,213 +1374,237 @@ // CHECK5-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i32* [[SIVAR1]]), !llvm.access.group !4 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store i32* [[SIVAR]], i32** [[TMP18]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP17:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK5-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK5-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP24:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK5-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK5-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP25]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP26]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK5-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP30:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP29]] monotonic, align 4 // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: store i32 0, i32* [[SIVAR2]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK5-NEXT: store i32 0, i32* [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR2]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK5-NEXT: store i32 [[ADD4]], i32* [[SIVAR2]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store i32* [[SIVAR2]], i32** [[TMP13]], align 8, !llvm.access.group !8 -// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group !8 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store i32* [[SIVAR]], i32** [[TMP19]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group !8 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK5-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK5-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK5-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP18:%.*]] = bitcast i32* [[SIVAR2]] to i8* -// CHECK5-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 -// CHECK5-NEXT: [[TMP19:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK5-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP19]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP24:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK5-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK5-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, i8* [[TMP25]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP26]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[SIVAR2]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK5-NEXT: store i32 [[ADD6]], i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK5-NEXT: store i32 [[ADD5]], i32* [[TMP6]], align 4 +// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR2]], align 4 -// CHECK5-NEXT: [[TMP24:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP23]] monotonic, align 4 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP30:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP29]] monotonic, align 4 // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp @@ -333,75 +333,93 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !8 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -410,13 +428,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -426,67 +444,73 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -498,75 +522,93 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !17 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -575,13 +617,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -591,67 +633,73 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -663,75 +711,93 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !23 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !23 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !23 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8, !llvm.access.group !23 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !23 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -740,13 +806,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -756,88 +822,94 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -849,75 +921,93 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !29 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -926,13 +1016,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -942,55 +1032,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -998,9 +1094,9 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1012,75 +1108,93 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !35 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1089,13 +1203,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1105,55 +1219,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1161,9 +1281,9 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1317,73 +1437,91 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !9 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1392,13 +1530,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1408,64 +1546,70 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1477,73 +1621,91 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !18 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1552,13 +1714,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1568,64 +1730,70 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1637,73 +1805,91 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !24 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !24 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1712,13 +1898,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1728,83 +1914,89 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP15]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP21]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1816,73 +2008,91 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !30 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !30 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1891,13 +2101,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1907,52 +2117,58 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP18]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -1960,9 +2176,9 @@ // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1974,73 +2190,91 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !36 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !36 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2049,13 +2283,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2065,52 +2299,58 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP18]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -2118,9 +2358,9 @@ // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2274,75 +2514,93 @@ // CHECK5-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !8 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2351,13 +2609,13 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2367,67 +2625,73 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2439,75 +2703,93 @@ // CHECK5-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !17 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !17 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !17 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !17 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !17 +// CHECK5-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8, !llvm.access.group !17 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !17 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !17 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2516,13 +2798,13 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2532,67 +2814,73 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !20 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2604,75 +2892,93 @@ // CHECK5-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !23 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !23 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !23 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !23 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !23 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !23 +// CHECK5-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !23 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8, !llvm.access.group !23 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !23 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2681,13 +2987,13 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2697,88 +3003,94 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK5-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2790,75 +3102,93 @@ // CHECK5-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !29 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !29 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2867,13 +3197,13 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2883,55 +3213,61 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !32 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !32 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -2939,9 +3275,9 @@ // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2953,75 +3289,93 @@ // CHECK5-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !35 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !35 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !35 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !35 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !35 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !35 +// CHECK5-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !35 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP18]], align 8, !llvm.access.group !35 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !35 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3030,13 +3384,13 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3046,55 +3400,61 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !38 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !38 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3102,9 +3462,9 @@ // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3258,73 +3618,91 @@ // CHECK7-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !9 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !9 // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK7-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3333,13 +3711,13 @@ // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3349,64 +3727,70 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK7-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK7-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3418,73 +3802,91 @@ // CHECK7-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !18 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !18 // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK7-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3493,13 +3895,13 @@ // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3509,64 +3911,70 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK7-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK7-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3578,73 +3986,91 @@ // CHECK7-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !24 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !24 // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK7-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3653,13 +4079,13 @@ // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3669,83 +4095,89 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP15]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP21]] // CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK7-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK7-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK7-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK7-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK7-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3757,73 +4189,91 @@ // CHECK7-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !30 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !30 // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK7-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3832,13 +4282,13 @@ // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3848,52 +4298,58 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK7-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP18]] // CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK7: omp.inner.for.end: @@ -3901,9 +4357,9 @@ // CHECK7: omp.dispatch.inc: // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK7-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK7-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3915,73 +4371,91 @@ // CHECK7-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !36 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !36 // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK7-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3990,13 +4464,13 @@ // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4006,52 +4480,58 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK7-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP18]] // CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK7: omp.inner.for.end: @@ -4059,9 +4539,9 @@ // CHECK7: omp.dispatch.inc: // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK7-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK7-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4839,29 +5319,31 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4871,90 +5353,106 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !13 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !13 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !13 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK13-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK13-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK13-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[I3]], align 4 +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK13-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[I3]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: br label [[OMP_PRECOND_END]] @@ -4963,15 +5461,14 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4981,97 +5478,104 @@ // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK13-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK13-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 -// CHECK13-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 -// CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK13-NEXT: store i32 [[ADD12]], i32* [[I5]], align 4 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK13-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I4]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: br label [[OMP_PRECOND_END]] @@ -5085,29 +5589,31 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5117,90 +5623,106 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !22 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !22 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !22 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK13-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK13-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK13-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[I3]], align 4 +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK13-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[I3]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: br label [[OMP_PRECOND_END]] @@ -5209,15 +5731,14 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5227,97 +5748,104 @@ // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK13-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK13-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 -// CHECK13-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 -// CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK13-NEXT: store i32 [[ADD12]], i32* [[I5]], align 4 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK13-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I4]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: br label [[OMP_PRECOND_END]] @@ -5332,8 +5860,7 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -5342,152 +5869,168 @@ // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK13-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK13-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK13-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !28 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK13-NEXT: store i64 [[TMP24]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK13-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: store i64 [[TMP28]], i64* [[TMP27]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: store i64 [[TMP30]], i64* [[TMP29]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[TMP33]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: store i32* [[TMP6]], i32** [[TMP34]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !28 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK13-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] -// CHECK13-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] -// CHECK13: cond.true14: -// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: br label [[COND_END16:%.*]] -// CHECK13: cond.false15: -// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: br label [[COND_END16]] -// CHECK13: cond.end16: -// CHECK13-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE14]] ], [ [[TMP33]], [[COND_FALSE15]] ] -// CHECK13-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] +// CHECK13-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK13: cond.true11: +// CHECK13-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: br label [[COND_END13:%.*]] +// CHECK13: cond.false12: +// CHECK13-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: br label [[COND_END13]] +// CHECK13: cond.end13: +// CHECK13-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE11]] ], [ [[TMP46]], [[COND_FALSE12]] ] +// CHECK13-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: store i32 [[TMP47]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) -// CHECK13-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK13-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP48:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP48]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP49]]) +// CHECK13-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 +// CHECK13-NEXT: br i1 [[TMP51]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[SUB18:%.*]] = sub nsw i32 [[TMP39]], 0 -// CHECK13-NEXT: [[DIV19:%.*]] = sdiv i32 [[SUB18]], 1 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV19]], 1 -// CHECK13-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD20]], i32* [[I5]], align 4 +// CHECK13-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP52]], 0 +// CHECK13-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 +// CHECK13-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] +// CHECK13-NEXT: store i32 [[ADD17]], i32* [[I4]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: br label [[OMP_PRECOND_END]] @@ -5496,118 +6039,125 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[I7:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK13-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK13-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK13-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK13-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK13-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 -// CHECK13-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK13-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !31 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !31 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !31 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK13-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK13-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 -// CHECK13-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 -// CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[I7]], align 4 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK13-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK13-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK13-NEXT: store i32 [[ADD12]], i32* [[I5]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: br label [[OMP_PRECOND_END]] @@ -5621,29 +6171,31 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5653,90 +6205,106 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !34 -// CHECK13-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !34 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !34 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !34 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !34 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK13-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK13-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK13-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[I3]], align 4 +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK13-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[I3]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: br label [[OMP_PRECOND_END]] @@ -5745,15 +6313,14 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5763,93 +6330,100 @@ // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP11]], i32 35, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !37 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !37 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !37 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK13-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK13-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK13-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK13-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 -// CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I5]], align 4 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK13-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 +// CHECK13-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 +// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// CHECK13-NEXT: store i32 [[ADD10]], i32* [[I4]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: br label [[OMP_PRECOND_END]] @@ -5864,8 +6438,7 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -5874,127 +6447,143 @@ // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK13-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK13-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK13-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]]), !llvm.access.group !40 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK13-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: store i64 [[TMP29]], i64* [[TMP28]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: store i32 [[TMP31]], i32* [[TMP30]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[TMP32]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: store i32* [[TMP6]], i32** [[TMP33]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: store i32 [[TMP35]], i32* [[TMP34]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !40 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK13-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK13-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK13-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK13-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD12]], i32* [[I5]], align 4 +// CHECK13-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP42]], 0 +// CHECK13-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[I4]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: br label [[OMP_PRECOND_END]] @@ -6003,115 +6592,122 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[I7:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK13-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK13-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK13-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 -// CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !43 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !43 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !43 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK13-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK13-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK13-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK13-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK13-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK13-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I5]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: br label [[OMP_PRECOND_END]] @@ -6276,75 +6872,93 @@ // CHECK13-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46 +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !46 +// CHECK13-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !46 +// CHECK13-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8, !llvm.access.group !46 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !46 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK13-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6353,13 +6967,13 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6369,66 +6983,72 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !49 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !49 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !49 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6440,75 +7060,93 @@ // CHECK13-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52 +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !52 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !52 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !52 +// CHECK13-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !52 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !52 +// CHECK13-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !52 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8, !llvm.access.group !52 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !52 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK13-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6517,13 +7155,13 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6533,66 +7171,72 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !55 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !55 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !55 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6605,90 +7249,105 @@ // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.16*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK13-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !58 -// CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !58 +// CHECK13-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !58 +// CHECK13-NEXT: store i64 [[TMP19]], i64* [[TMP18]], align 8, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP20]], align 8, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 8, !llvm.access.group !58 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !58 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK13-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6697,14 +7356,14 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6714,90 +7373,97 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK13-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !61 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK13-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]]) +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK13-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6809,75 +7475,93 @@ // CHECK13-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.20*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.20* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.20*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.20* [[__CONTEXT]], %struct.anon.20** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.20*, %struct.anon.20** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], %struct.anon.20* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !64 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !64 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64 +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !64 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !64 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !64 +// CHECK13-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !64 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !64 +// CHECK13-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !64 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8, !llvm.access.group !64 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.21*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !64 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !64 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !64 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP65:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK13-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6886,13 +7570,13 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.21* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.21*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6902,54 +7586,60 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.21* [[__CONTEXT]], %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], %struct.anon.21* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK13-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !67 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !67 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !67 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !67 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !67 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !67 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 -// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK13-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -6957,9 +7647,9 @@ // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK13-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6972,90 +7662,105 @@ // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.22*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.22* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.22*, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK13-NEXT: store %struct.anon.22* [[__CONTEXT]], %struct.anon.22** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.22*, %struct.anon.22** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], %struct.anon.22* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !70 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !70 -// CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !70 +// CHECK13-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !70 +// CHECK13-NEXT: store i64 [[TMP19]], i64* [[TMP18]], align 8, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP20]], align 8, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 8, !llvm.access.group !70 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.24*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !70 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !70 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK13-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7064,14 +7769,14 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.24* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.24*, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7081,67 +7786,74 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK13-NEXT: store %struct.anon.24* [[__CONTEXT]], %struct.anon.24** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.24*, %struct.anon.24** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], %struct.anon.24* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK13-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !73 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !73 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !73 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !73 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !73 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !73 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK13-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK13-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7531,27 +8243,30 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7561,82 +8276,100 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !14 -// CHECK15-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !14 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !14 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !14 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !14 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK15-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK15-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK15-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -7649,15 +8382,14 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7670,86 +8402,94 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !18 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !18 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK15-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK15-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK15-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK15-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -7767,27 +8507,30 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7797,82 +8540,100 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !23 -// CHECK15-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !23 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !23 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !23 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !23 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK15-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK15-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK15-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -7885,15 +8646,14 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7906,86 +8666,94 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !26 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK15-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK15-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK15-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK15-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -8004,33 +8772,35 @@ // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8040,112 +8810,131 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !29 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: store i32 [[TMP24]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: store i32 [[TMP28]], i32* [[TMP27]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[TMP31]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store i32* [[TMP6]], i32** [[TMP32]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !29 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK15-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK15-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] +// CHECK15-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK15-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK15: cond.true11: -// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK15-NEXT: br label [[COND_END13:%.*]] // CHECK15: cond.false12: -// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK15-NEXT: br label [[COND_END13]] // CHECK15: cond.end13: -// CHECK15-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] +// CHECK15-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] // CHECK15-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 -// CHECK15-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) -// CHECK15-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK15-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) +// CHECK15-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK15-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK15-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP50]], 0 // CHECK15-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK15-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -8158,16 +8947,15 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8180,87 +8968,97 @@ // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !32 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !32 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !32 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP29]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK15-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK15-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK15-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK15-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK15-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -8278,27 +9076,30 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8308,82 +9109,100 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !35 -// CHECK15-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !35 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !35 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !35 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !35 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK15-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK15-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK15-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -8396,15 +9215,14 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8417,69 +9235,77 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP11]], i32 35, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK15-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !38 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !38 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP19]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !38 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP28]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK15-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -8487,12 +9313,12 @@ // CHECK15: omp.dispatch.inc: // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK15-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK15-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK15-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK15-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]] @@ -8511,33 +9337,35 @@ // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8547,87 +9375,106 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !41 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !41 -// CHECK15-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !41 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !41 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !41 -// CHECK15-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]]), !llvm.access.group !41 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[TMP30]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: store i32 [[TMP33]], i32* [[TMP32]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !41 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !41 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK15-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP37]]) +// CHECK15-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK15-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK15-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP40]], 0 // CHECK15-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -8640,16 +9487,15 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8662,71 +9508,81 @@ // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !44 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !44 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP20]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !44 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP31]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -8734,12 +9590,12 @@ // CHECK15: omp.dispatch.inc: // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK15-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK15-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK15-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK15-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -8906,73 +9762,91 @@ // CHECK15-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !47 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !47 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !47 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !47 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !47 +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !47 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4, !llvm.access.group !47 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !47 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK15-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK15-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, i32* [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -8981,13 +9855,13 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8997,63 +9871,69 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !50 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !50 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !50 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK15-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK15-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK15-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, i32* [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9065,73 +9945,91 @@ // CHECK15-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !53 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !53 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !53 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !53 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !53 +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !53 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4, !llvm.access.group !53 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !53 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !53 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !53 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK15-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK15-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, i32* [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9140,13 +10038,13 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9156,63 +10054,69 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !56 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !56 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !56 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !56 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !56 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !56 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 -// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK15-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK15-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK15-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, i32* [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9225,84 +10129,102 @@ // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !59 -// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[TMP16]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !59 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK15-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK15-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, i32* [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9311,14 +10233,14 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9328,84 +10250,92 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 -// CHECK15-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 +// CHECK15-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !62 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !62 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP24]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !62 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 -// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK15-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK15-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK15-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK15-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]]) +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK15-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, i32* [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9417,73 +10347,91 @@ // CHECK15-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !65 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !65 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !65 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !65 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !65 +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !65 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4, !llvm.access.group !65 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.16*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !65 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !65 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !65 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK15-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK15-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, i32* [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9492,13 +10440,13 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9508,51 +10456,57 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK15-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !68 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !68 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !68 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !68 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !68 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP18]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !68 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 -// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -9560,9 +10514,9 @@ // CHECK15: omp.dispatch.inc: // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK15-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK15-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, i32* [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9575,84 +10529,102 @@ // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.17*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.17* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.17*, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.17* [[__CONTEXT]], %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], %struct.anon.17* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !71 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !71 -// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[TMP16]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !71 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !71 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK15-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK15-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, i32* [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9661,14 +10633,14 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9678,53 +10650,61 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK15-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !74 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !74 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !74 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !74 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !74 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP21]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !74 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 -// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -9732,9 +10712,9 @@ // CHECK15: omp.dispatch.inc: // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK15-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK15-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, i32* [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10127,29 +11107,31 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10159,90 +11141,106 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 -// CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !13 -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !13 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !13 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !13 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !13 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !13 +// CHECK17-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8, !llvm.access.group !13 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !13 +// CHECK17-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8, !llvm.access.group !13 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !13 +// CHECK17-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8, !llvm.access.group !13 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8, !llvm.access.group !13 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8, !llvm.access.group !13 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !13 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK17-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK17-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[I3]], align 4 +// CHECK17-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK17-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[I3]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: // CHECK17-NEXT: br label [[OMP_PRECOND_END]] @@ -10251,15 +11249,14 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10269,97 +11266,104 @@ // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] +// CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !17 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !17 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK17-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK17-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 -// CHECK17-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 -// CHECK17-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK17-NEXT: store i32 [[ADD12]], i32* [[I5]], align 4 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK17-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK17-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK17-NEXT: store i32 [[ADD11]], i32* [[I4]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: // CHECK17-NEXT: br label [[OMP_PRECOND_END]] @@ -10373,29 +11377,31 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10405,90 +11411,106 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !22 -// CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !22 -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !22 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !22 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !22 +// CHECK17-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !22 +// CHECK17-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !22 +// CHECK17-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8, !llvm.access.group !22 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !22 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK17-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK17-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[I3]], align 4 +// CHECK17-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK17-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[I3]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: // CHECK17-NEXT: br label [[OMP_PRECOND_END]] @@ -10497,15 +11519,14 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10515,97 +11536,104 @@ // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] +// CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !25 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !25 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !25 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK17-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK17-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 -// CHECK17-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 -// CHECK17-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK17-NEXT: store i32 [[ADD12]], i32* [[I5]], align 4 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK17-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK17-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK17-NEXT: store i32 [[ADD11]], i32* [[I4]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: // CHECK17-NEXT: br label [[OMP_PRECOND_END]] @@ -10620,8 +11648,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -10630,152 +11657,168 @@ // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK17-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK17-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK17-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] -// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !28 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !28 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK17-NEXT: store i64 [[TMP24]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK17-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !28 +// CHECK17-NEXT: store i64 [[TMP28]], i64* [[TMP27]], align 8, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !28 +// CHECK17-NEXT: store i64 [[TMP30]], i64* [[TMP29]], align 8, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 8, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[TMP33]], align 8, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store i32* [[TMP6]], i32** [[TMP34]], align 8, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: store i32 [[TMP36]], i32* [[TMP35]], align 8, !llvm.access.group !28 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !28 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK17-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK17-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] -// CHECK17-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] -// CHECK17: cond.true14: -// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: br label [[COND_END16:%.*]] -// CHECK17: cond.false15: -// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: br label [[COND_END16]] -// CHECK17: cond.end16: -// CHECK17-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE14]] ], [ [[TMP33]], [[COND_FALSE15]] ] -// CHECK17-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 -// CHECK17-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] +// CHECK17-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK17: cond.true11: +// CHECK17-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: br label [[COND_END13:%.*]] +// CHECK17: cond.false12: +// CHECK17-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: br label [[COND_END13]] +// CHECK17: cond.end13: +// CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE11]] ], [ [[TMP46]], [[COND_FALSE12]] ] +// CHECK17-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: store i32 [[TMP47]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) -// CHECK17-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK17-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP48:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP48]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP49]]) +// CHECK17-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 +// CHECK17-NEXT: br i1 [[TMP51]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[SUB18:%.*]] = sub nsw i32 [[TMP39]], 0 -// CHECK17-NEXT: [[DIV19:%.*]] = sdiv i32 [[SUB18]], 1 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV19]], 1 -// CHECK17-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD20]], i32* [[I5]], align 4 +// CHECK17-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP52]], 0 +// CHECK17-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 +// CHECK17-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] +// CHECK17-NEXT: store i32 [[ADD17]], i32* [[I4]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: // CHECK17-NEXT: br label [[OMP_PRECOND_END]] @@ -10784,118 +11827,125 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I7:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 8 +// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK17-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK17-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK17-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK17-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK17-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 -// CHECK17-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK17-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 +// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !31 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !31 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !31 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !31 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !31 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK17-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK17-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 -// CHECK17-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 -// CHECK17-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] -// CHECK17-NEXT: store i32 [[ADD14]], i32* [[I7]], align 4 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK17-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK17-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK17-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK17-NEXT: store i32 [[ADD12]], i32* [[I5]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: // CHECK17-NEXT: br label [[OMP_PRECOND_END]] @@ -10909,29 +11959,31 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10941,90 +11993,106 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !34 -// CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !34 -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !34 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !34 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !34 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !34 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !34 +// CHECK17-NEXT: store i64 [[TMP25]], i64* [[TMP24]], align 8, !llvm.access.group !34 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !34 +// CHECK17-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8, !llvm.access.group !34 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !34 +// CHECK17-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8, !llvm.access.group !34 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8, !llvm.access.group !34 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8, !llvm.access.group !34 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !34 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK17-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK17-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[I3]], align 4 +// CHECK17-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK17-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[I3]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: // CHECK17-NEXT: br label [[OMP_PRECOND_END]] @@ -11033,15 +12101,14 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11051,93 +12118,100 @@ // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP11]], i32 1073741859, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !37 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !37 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !37 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !37 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !37 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK17-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK17-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK17-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 -// CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK17-NEXT: store i32 [[ADD11]], i32* [[I5]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK17-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 +// CHECK17-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 +// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// CHECK17-NEXT: store i32 [[ADD10]], i32* [[I4]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: // CHECK17-NEXT: br label [[OMP_PRECOND_END]] @@ -11152,8 +12226,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -11162,127 +12235,143 @@ // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK17-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK17-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK17-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !40 -// CHECK17-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4, !llvm.access.group !40 -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !40 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !40 -// CHECK17-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4, !llvm.access.group !40 -// CHECK17-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]]), !llvm.access.group !40 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK17-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !40 +// CHECK17-NEXT: store i64 [[TMP27]], i64* [[TMP26]], align 8, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !40 +// CHECK17-NEXT: store i64 [[TMP29]], i64* [[TMP28]], align 8, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !40 +// CHECK17-NEXT: store i32 [[TMP31]], i32* [[TMP30]], align 8, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[TMP32]], align 8, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store i32* [[TMP6]], i32** [[TMP33]], align 8, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !40 +// CHECK17-NEXT: store i32 [[TMP35]], i32* [[TMP34]], align 8, !llvm.access.group !40 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !40 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK17-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK17-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK17-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK17-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD12]], i32* [[I5]], align 4 +// CHECK17-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP42]], 0 +// CHECK17-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] +// CHECK17-NEXT: store i32 [[ADD9]], i32* [[I4]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: // CHECK17-NEXT: br label [[OMP_PRECOND_END]] @@ -11291,115 +12380,122 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I7:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 8 +// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK17-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK17-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK17-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 -// CHECK17-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK17-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !43 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !43 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !43 +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !43 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !43 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK17-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK17-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK17-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK17-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK17-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK17-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK17-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK17-NEXT: store i32 [[ADD11]], i32* [[I5]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: // CHECK17-NEXT: br label [[OMP_PRECOND_END]] @@ -11564,75 +12660,93 @@ // CHECK17-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46 +// CHECK17-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !46 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !46 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !46 +// CHECK17-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !46 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !46 +// CHECK17-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !46 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8, !llvm.access.group !46 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !46 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11641,13 +12755,13 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11657,66 +12771,72 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !49 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !49 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !49 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK17-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11728,75 +12848,93 @@ // CHECK17-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52 +// CHECK17-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !52 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !52 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !52 +// CHECK17-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !52 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !52 +// CHECK17-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !52 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8, !llvm.access.group !52 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !52 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11805,13 +12943,13 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11821,66 +12959,72 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !55 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !55 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !55 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK17-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11893,90 +13037,105 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.16*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK17-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 -// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !58 -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !58 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !58 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !58 +// CHECK17-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !58 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !58 +// CHECK17-NEXT: store i64 [[TMP19]], i64* [[TMP18]], align 8, !llvm.access.group !58 +// CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP20]], align 8, !llvm.access.group !58 +// CHECK17-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !58 +// CHECK17-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 8, !llvm.access.group !58 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !58 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11985,14 +13144,14 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12002,90 +13161,97 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK17-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !61 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !61 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !61 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK17-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12097,75 +13263,93 @@ // CHECK17-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.20*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.20* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.20*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.20* [[__CONTEXT]], %struct.anon.20** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.20*, %struct.anon.20** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], %struct.anon.20* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !64 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !64 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64 +// CHECK17-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !64 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !64 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !64 +// CHECK17-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group !64 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !64 +// CHECK17-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !64 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8, !llvm.access.group !64 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.21*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !64 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !64 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !64 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP65:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12174,13 +13358,13 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.21* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.21*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12190,54 +13374,60 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.21* [[__CONTEXT]], %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], %struct.anon.21* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK17-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !67 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !67 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !67 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !67 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !67 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !67 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -12245,9 +13435,9 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK17-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12260,90 +13450,105 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.22*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.22* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.22*, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK17-NEXT: store %struct.anon.22* [[__CONTEXT]], %struct.anon.22** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.22*, %struct.anon.22** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], %struct.anon.22* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !70 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 -// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !70 -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !70 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !70 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !70 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !70 +// CHECK17-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group !70 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !70 +// CHECK17-NEXT: store i64 [[TMP19]], i64* [[TMP18]], align 8, !llvm.access.group !70 +// CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP20]], align 8, !llvm.access.group !70 +// CHECK17-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !70 +// CHECK17-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 8, !llvm.access.group !70 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.24*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !70 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !70 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !70 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12352,14 +13557,14 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.24* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.24*, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12369,67 +13574,74 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK17-NEXT: store %struct.anon.24* [[__CONTEXT]], %struct.anon.24** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.24*, %struct.anon.24** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], %struct.anon.24* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK17-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !73 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !73 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !73 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !73 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !73 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !73 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK17-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12819,27 +14031,30 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12849,82 +14064,100 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !14 -// CHECK19-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !14 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !14 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !14 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !14 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK19-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -12937,15 +14170,14 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12958,86 +14190,94 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK19-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -13055,27 +14295,30 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13085,82 +14328,100 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !23 -// CHECK19-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !23 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !23 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !23 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !23 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK19-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -13173,15 +14434,14 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13194,86 +14454,94 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !26 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK19-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -13292,33 +14560,35 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13328,112 +14598,131 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !29 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: store i32 [[TMP24]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: store i32 [[TMP28]], i32* [[TMP27]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP31]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32* [[TMP6]], i32** [[TMP32]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !29 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK19-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK19-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK19-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] +// CHECK19-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK19-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK19: cond.true11: -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK19-NEXT: br label [[COND_END13:%.*]] // CHECK19: cond.false12: -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK19-NEXT: br label [[COND_END13]] // CHECK19: cond.end13: -// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] +// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] // CHECK19-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) -// CHECK19-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK19-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) +// CHECK19-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK19-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK19-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP50]], 0 // CHECK19-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK19-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -13446,16 +14735,15 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13468,87 +14756,97 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !32 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !32 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !32 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP29]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK19-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK19-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK19-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -13566,27 +14864,30 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13596,82 +14897,100 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !35 -// CHECK19-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !35 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !35 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !35 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !35 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK19-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -13684,15 +15003,14 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13705,69 +15023,77 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP11]], i32 1073741859, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !38 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !38 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !38 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP28]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -13775,12 +15101,12 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK19-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK19-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]] @@ -13799,33 +15125,35 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13835,87 +15163,106 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]]), !llvm.access.group !41 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[N]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP30]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: store i32 [[TMP33]], i32* [[TMP32]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !41 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK19-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP37]]) +// CHECK19-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK19-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP40]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -13928,16 +15275,15 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13950,71 +15296,81 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], i32* [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !44 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !44 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP20]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !44 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP31]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -14022,12 +15378,12 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK19-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -14194,73 +15550,91 @@ // CHECK19-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !47 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -14269,13 +15643,13 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14285,63 +15659,69 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !50 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !50 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !50 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -14353,73 +15733,91 @@ // CHECK19-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !53 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !53 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -14428,13 +15826,13 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14444,63 +15842,69 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !56 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !56 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !56 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !56 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !56 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !56 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -14513,84 +15917,102 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !59 -// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[TMP16]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !59 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK19-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -14599,14 +16021,14 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14616,84 +16038,92 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 -// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !62 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !62 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP24]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !62 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK19-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK19-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -14705,73 +16135,91 @@ // CHECK19-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.16*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !65 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !65 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -14780,13 +16228,13 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14796,51 +16244,57 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK19-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !68 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !68 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !68 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !68 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !68 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP18]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !68 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -14848,9 +16302,9 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK19-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK19-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -14863,84 +16317,102 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.17*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.17* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.17*, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.17* [[__CONTEXT]], %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], %struct.anon.17* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !71 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !71 -// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[TMP16]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !71 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !71 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK19-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -14949,14 +16421,14 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14966,53 +16438,61 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK19-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !74 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !74 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !74 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !74 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !74 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP21]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !74 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -15020,9 +16500,9 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK19-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp @@ -253,15 +253,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l91 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -276,6 +278,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -291,70 +295,70 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[ARRAYIDX3]] to i8* -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[SIVAR]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[ARRAYIDX3]] to i8* +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP14]] // CHECK1-NEXT: store i32 [[ADD4]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i64 2 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -434,15 +438,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 @@ -458,6 +464,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 @@ -475,67 +483,67 @@ // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5]] to i8* -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP10]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5]] to i8* +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[TMP11]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -741,15 +749,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l91 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -764,6 +774,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -779,68 +791,68 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP10]] -// CHECK3-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[SIVAR]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP14]] // CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -920,15 +932,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 @@ -944,6 +958,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 @@ -961,65 +977,65 @@ // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* -// CHECK3-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP10]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[TMP11]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) // CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1217,15 +1233,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74 // CHECK9-SAME: () #[[ATTR5:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 @@ -1241,63 +1259,65 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32* undef, i32** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store i32* [[G1]], i32** [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK9-NEXT: store i32 1, i32* [[G]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK9-NEXT: store volatile i32 1, i32* [[TMP8]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK9-NEXT: store volatile i32 1, i32* [[TMP9]], align 4 // CHECK9-NEXT: store i32 2, i32* [[SIVAR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[G]], i32** [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK9-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[G]], i32** [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK9-NEXT: store i32* [[TMP12]], i32** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP13]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp @@ -113,19 +113,22 @@ // CHECK1-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -136,76 +139,78 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK1-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP14]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -272,19 +277,22 @@ // CHECK1-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -295,76 +303,78 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[T_VAR1]], align 4 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[T_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast i32* [[T_VAR1]] to i8* -// CHECK1-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP14]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -436,19 +446,22 @@ // CHECK3-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -459,76 +472,78 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP14]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -595,19 +610,22 @@ // CHECK3-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -618,76 +636,78 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[T_VAR1]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[T_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = bitcast i32* [[T_VAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP14]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -738,19 +758,22 @@ // CHECK9-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -762,79 +785,81 @@ // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[SIVAR1]], i32** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK9-NEXT: store i32 [[ADD2]], i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP13]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP14:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK9-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP15]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP16:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK9-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP17]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP19]] monotonic, align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP21]] monotonic, align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp @@ -313,7 +313,7 @@ // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_CASTED7:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS9:%.*]] = alloca [1 x i8*], align 8 @@ -389,13 +389,13 @@ // CHECK1-NEXT: store i8* null, i8** [[TMP29]], align 8 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 // CHECK1-NEXT: [[TMP33:%.*]] = load i16, i16* [[AA]], align 2 // CHECK1-NEXT: store i16 [[TMP33]], i16* [[TMP32]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: store i32 [[TMP35]], i32* [[TMP34]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 2 // CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 [[TMP37]], i32* [[TMP36]], align 4 // CHECK1-NEXT: [[TMP38:%.*]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i64 120, i64 12, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*), i64 -1) @@ -403,10 +403,10 @@ // CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP39]], i32 0, i32 0 // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP40]], i32 0, i32 0 // CHECK1-NEXT: [[TMP42:%.*]] = load i8*, i8** [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i64 12, i1 false) // CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP39]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to %struct.anon* +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to %struct.anon.0* // CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP44]], i32 0, i32 0 // CHECK1-NEXT: [[TMP47:%.*]] = bitcast [3 x i8*]* [[TMP46]] to i8* // CHECK1-NEXT: [[TMP48:%.*]] = bitcast i8** [[TMP30]] to i8* @@ -609,7 +609,7 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -620,20 +620,20 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 2 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -643,54 +643,57 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -735,7 +738,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i16*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca [3 x i8*]*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [3 x i8*]*, align 8 @@ -753,7 +756,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -766,8 +769,8 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !26 // CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !26 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !26 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !26 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !26 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !26 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !26 // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !26 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !26 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @@ -779,8 +782,8 @@ // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK1-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10) #[[ATTR4]] // CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* [[TMP22]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 [[TMP25]], i32 1, i32 0, i8* null, i32 0, i8* null) #[[ATTR4]] @@ -809,23 +812,23 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -835,59 +838,62 @@ // CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[A1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[A1]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: store i32 10, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 10, i32* [[A]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void @@ -897,23 +903,23 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK1-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 2 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -923,59 +929,62 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !29 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP11:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !29 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 +// CHECK1-NEXT: store i16 [[CONV3]], i16* [[AA]], align 2, !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -988,31 +997,29 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1022,64 +1029,68 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !32 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !32 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1099,7 +1110,7 @@ // CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -1118,28 +1129,36 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [10 x float]*, i64, float*, [5 x [10 x double]]*, i64, i64, double*, %struct.TT*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], [10 x float]* [[TMP0]], i64 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]]) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[TMP2]], float** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP5]], i64* [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: store double* [[TMP6]], double** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK1-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1149,108 +1168,111 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 -// CHECK1-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 -// CHECK1-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[ARRAYDECAY]], i64 16) ] // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double -// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 -// CHECK1-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK1-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[CONV11:%.*]] = fpext float [[TMP18]] to double -// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK1-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK1-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !35 -// CHECK1-NEXT: [[ADD16:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !35 -// CHECK1-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP20]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i64 3 -// CHECK1-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !35 -// CHECK1-NEXT: [[ADD19:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !35 -// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !35 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK1-NEXT: store i64 [[ADD20]], i64* [[X]], align 8, !llvm.access.group !35 -// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !35 -// CHECK1-NEXT: [[CONV21:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK1-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK1-NEXT: store i8 [[CONV23]], i8* [[Y]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK1-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK1-NEXT: store float [[CONV4]], float* [[ARRAYIDX]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 3 +// CHECK1-NEXT: [[TMP29:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK1-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 +// CHECK1-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float +// CHECK1-NEXT: store float [[CONV8]], float* [[ARRAYIDX5]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX9]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP30:%.*]] = load double, double* [[ARRAYIDX10]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD11]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP31:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 [[TMP31]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX12]], i64 3 +// CHECK1-NEXT: [[TMP32:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD14]], double* [[ARRAYIDX13]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK1-NEXT: store i64 [[ADD15]], i64* [[X]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP34:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK1-NEXT: store i8 [[CONV18]], i8* [[Y]], align 8, !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1596,7 +1618,7 @@ // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -1607,24 +1629,28 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1634,76 +1660,79 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: store double [[ADD5]], double* [[A]], align 8, !llvm.access.group !38 -// CHECK1-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, double* [[A6]], align 8, !llvm.access.group !38 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], double* [[A6]], align 8, !llvm.access.group !38 -// CHECK1-NEXT: [[CONV7:%.*]] = fptosi double [[INC]] to i16 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP14]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[B]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: store double [[ADD2]], double* [[A]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = load double, double* [[A3]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], double* [[A3]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i64 [[TMP21]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2, !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1719,10 +1748,7 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 @@ -1733,159 +1759,163 @@ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 -// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 -// CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* -// CHECK1-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV2]], align 2 +// CHECK1-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV3]], align 1 +// CHECK1-NEXT: store i8 [[TMP8]], i8* [[TMP7]], align 2 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP9]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[N:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I8:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] -// CHECK1-NEXT: [[SUB6:%.*]] = sub i32 [[SUB]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB6]], 1 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[N]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP5]], align 8 +// CHECK1-NEXT: store i16 [[TMP6]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 2 +// CHECK1-NEXT: store i8 [[TMP8]], i8* [[AAA]], align 1 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 -// CHECK1-NEXT: [[SUB7:%.*]] = sub i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB7]], i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[ADD10:%.*]] = add i32 [[TMP17]], 1 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp ult i32 [[TMP16]], [[ADD10]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] +// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2, !llvm.access.group !41 -// CHECK1-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 -// CHECK1-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2, !llvm.access.group !41 -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1, !llvm.access.group !41 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1, !llvm.access.group !41 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK1-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[I5]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[A]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP31:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !41 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 +// CHECK1-NEXT: store i16 [[CONV12]], i16* [[AA]], align 2, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP32:%.*]] = load i8, i8* [[AAA]], align 1, !llvm.access.group !41 +// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 +// CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 +// CHECK1-NEXT: store i8 [[CONV15]], i8* [[AAA]], align 1, !llvm.access.group !41 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP10]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK1-NEXT: store i32 [[ADD16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[ADD21:%.*]] = add i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 +// CHECK1-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK1-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB22:%.*]] = sub i32 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[SUB22]], 1 -// CHECK1-NEXT: [[ADD24:%.*]] = add i32 [[SUB23]], 1 -// CHECK1-NEXT: [[DIV25:%.*]] = udiv i32 [[ADD24]], 1 -// CHECK1-NEXT: [[MUL26:%.*]] = mul i32 [[DIV25]], 1 -// CHECK1-NEXT: [[ADD27:%.*]] = add i32 [[TMP29]], [[MUL26]] -// CHECK1-NEXT: store i32 [[ADD27]], i32* [[I8]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 +// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 +// CHECK1-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 +// CHECK1-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 +// CHECK1-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] +// CHECK1-NEXT: store i32 [[ADD23]], i32* [[I5]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -1899,34 +1929,33 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1936,70 +1965,74 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !44 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !44 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !44 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !44 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2034,7 +2067,7 @@ // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_CASTED4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS6:%.*]] = alloca [1 x i8*], align 4 @@ -2106,13 +2139,13 @@ // CHECK3-NEXT: store i8* null, i8** [[TMP27]], align 4 // CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = load i16, i16* [[AA]], align 2 // CHECK3-NEXT: store i16 [[TMP31]], i16* [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: store i32 [[TMP33]], i32* [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 2 // CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 [[TMP35]], i32* [[TMP34]], align 4 // CHECK3-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 72, i32 12, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*), i64 -1) @@ -2120,10 +2153,10 @@ // CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 // CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP38]], i32 0, i32 0 // CHECK3-NEXT: [[TMP40:%.*]] = load i8*, i8** [[TMP39]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// CHECK3-NEXT: [[TMP41:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i32 12, i1 false) // CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP43:%.*]] = bitcast i8* [[TMP40]] to %struct.anon* +// CHECK3-NEXT: [[TMP43:%.*]] = bitcast i8* [[TMP40]] to %struct.anon.0* // CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP42]], i32 0, i32 0 // CHECK3-NEXT: [[TMP45:%.*]] = bitcast [3 x i64]* [[TMP44]] to i8* // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP45]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false) @@ -2325,7 +2358,7 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -2334,20 +2367,20 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 2 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2357,54 +2390,57 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 10, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2449,7 +2485,7 @@ // CHECK3-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 4 // CHECK3-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 4 // CHECK3-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i16*, align 4 // CHECK3-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca [3 x i8*]*, align 4 // CHECK3-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [3 x i8*]*, align 4 @@ -2467,7 +2503,7 @@ // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK3-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -2480,8 +2516,8 @@ // CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !27 // CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !27 // CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !27 -// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !27 -// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !27 +// CHECK3-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !27 +// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !27 // CHECK3-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !27 // CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !27 // CHECK3-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @@ -2493,8 +2529,8 @@ // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK3-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10) #[[ATTR4]] // CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* [[TMP22]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 [[TMP25]], i32 1, i32 0, i8* null, i32 0, i8* null) #[[ATTR4]] @@ -2521,21 +2557,22 @@ // CHECK3-SAME: (i32 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2545,58 +2582,62 @@ // CHECK3-NEXT: [[A1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[A1]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[A1]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK3-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: store i32 10, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 10, i32* [[A]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: ret void @@ -2606,23 +2647,23 @@ // CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 2 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2632,59 +2673,62 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 -// CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP11:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !30 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 +// CHECK3-NEXT: store i16 [[CONV3]], i16* [[AA]], align 2, !llvm.access.group !30 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK3-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 10, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2697,29 +2741,28 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2729,63 +2772,68 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !33 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !33 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !33 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK3-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !33 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 10, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2805,7 +2853,7 @@ // CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -2823,27 +2871,36 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x float]*, i32, float*, [5 x [10 x double]]*, i32, i32, double*, %struct.TT*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP9]], [10 x float]* [[TMP0]], i32 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[TMP2]], float** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK3-NEXT: store double* [[TMP6]], double** [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK3-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2853,107 +2910,111 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 -// CHECK3-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 -// CHECK3-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 4 +// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[ARRAYDECAY]], i32 16) ] // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[A_ADDR]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK3-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK3-NEXT: store float [[CONV4]], float* [[ARRAYIDX]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 3 +// CHECK3-NEXT: [[TMP29:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK3-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK3-NEXT: store float [[CONV8]], float* [[ARRAYIDX]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double -// CHECK3-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 -// CHECK3-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK3-NEXT: store float [[CONV12]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !36 -// CHECK3-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !36 -// CHECK3-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK3-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP20]] -// CHECK3-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX16]], i32 3 -// CHECK3-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !36 -// CHECK3-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !36 -// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK3-NEXT: store i64 [[ADD19]], i64* [[X]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK3-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 -// CHECK3-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK3-NEXT: store i8 [[CONV22]], i8* [[Y]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: store float [[CONV8]], float* [[ARRAYIDX5]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i32 0, i32 1 +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX9]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP30:%.*]] = load double, double* [[ARRAYIDX10]], align 8, !llvm.access.group !36 +// CHECK3-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD11]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !36 +// CHECK3-NEXT: [[TMP31:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK3-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 [[TMP31]] +// CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX12]], i32 3 +// CHECK3-NEXT: [[TMP32:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !36 +// CHECK3-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD14]], double* [[ARRAYIDX13]], align 8, !llvm.access.group !36 +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP33:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK3-NEXT: store i64 [[ADD15]], i64* [[X]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP34:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK3-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK3-NEXT: store i8 [[CONV18]], i8* [[Y]], align 4, !llvm.access.group !36 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK3-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK3-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]]) -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK3-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 10, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3295,7 +3356,7 @@ // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -3305,23 +3366,28 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3331,75 +3397,79 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: store double [[ADD4]], double* [[A]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], double* [[A5]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK3-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP14]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2, !llvm.access.group !39 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[B]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: store double [[ADD2]], double* [[A]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = load double, double* [[A3]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], double* [[A3]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 [[TMP21]] +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2, !llvm.access.group !39 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 10, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3415,10 +3485,7 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 @@ -3427,155 +3494,163 @@ // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* -// CHECK3-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK3-NEXT: store i8 [[TMP8]], i8* [[TMP7]], align 2 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP9]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[N:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 -// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] -// CHECK3-NEXT: [[SUB4:%.*]] = sub i32 [[SUB]], 1 -// CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB4]], 1 +// CHECK3-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[N]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP5]], align 4 +// CHECK3-NEXT: store i16 [[TMP6]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 2 +// CHECK3-NEXT: store i8 [[TMP8]], i8* [[AAA]], align 1 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK3-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 -// CHECK3-NEXT: [[SUB5:%.*]] = sub i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB5]], i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP17]], 1 -// CHECK3-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP16]], [[ADD8]] -// CHECK3-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK3-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] +// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK3-NEXT: [[ADD10:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !42 -// CHECK3-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 -// CHECK3-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !42 -// CHECK3-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1, !llvm.access.group !42 -// CHECK3-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 -// CHECK3-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK3-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1, !llvm.access.group !42 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK3-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[I5]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD10]], i32* [[A]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP31:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !42 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 +// CHECK3-NEXT: store i16 [[CONV12]], i16* [[AA]], align 2, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP32:%.*]] = load i8, i8* [[AAA]], align 1, !llvm.access.group !42 +// CHECK3-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 +// CHECK3-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 +// CHECK3-NEXT: store i8 [[CONV15]], i8* [[AAA]], align 1, !llvm.access.group !42 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP10]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK3-NEXT: store i32 [[ADD16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[ADD19:%.*]] = add i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 +// CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK3-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB20:%.*]] = sub i32 [[TMP30]], [[TMP31]] -// CHECK3-NEXT: [[SUB21:%.*]] = sub i32 [[SUB20]], 1 -// CHECK3-NEXT: [[ADD22:%.*]] = add i32 [[SUB21]], 1 -// CHECK3-NEXT: [[DIV23:%.*]] = udiv i32 [[ADD22]], 1 -// CHECK3-NEXT: [[MUL24:%.*]] = mul i32 [[DIV23]], 1 -// CHECK3-NEXT: [[ADD25:%.*]] = add i32 [[TMP29]], [[MUL24]] -// CHECK3-NEXT: store i32 [[ADD25]], i32* [[I6]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] +// CHECK3-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 +// CHECK3-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 +// CHECK3-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 +// CHECK3-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 +// CHECK3-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] +// CHECK3-NEXT: store i32 [[ADD23]], i32* [[I5]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] @@ -3589,32 +3664,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3624,69 +3699,74 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !45 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !45 -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !45 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !45 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !45 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !45 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !45 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK3-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !45 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 10, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3721,7 +3801,7 @@ // CHECK5-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 // CHECK5-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 // CHECK5-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 -// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[AA_CASTED7:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOFFLOAD_BASEPTRS9:%.*]] = alloca [1 x i8*], align 8 @@ -3797,13 +3877,13 @@ // CHECK5-NEXT: store i8* null, i8** [[TMP29]], align 8 // CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 // CHECK5-NEXT: [[TMP33:%.*]] = load i16, i16* [[AA]], align 2 // CHECK5-NEXT: store i16 [[TMP33]], i16* [[TMP32]], align 4 -// CHECK5-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK5-NEXT: store i32 [[TMP35]], i32* [[TMP34]], align 4 -// CHECK5-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 2 // CHECK5-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK5-NEXT: store i32 [[TMP37]], i32* [[TMP36]], align 4 // CHECK5-NEXT: [[TMP38:%.*]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i64 120, i64 12, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*), i64 -1) @@ -3811,10 +3891,10 @@ // CHECK5-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP39]], i32 0, i32 0 // CHECK5-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP40]], i32 0, i32 0 // CHECK5-NEXT: [[TMP42:%.*]] = load i8*, i8** [[TMP41]], align 8 -// CHECK5-NEXT: [[TMP43:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// CHECK5-NEXT: [[TMP43:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i64 12, i1 false) // CHECK5-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP39]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to %struct.anon* +// CHECK5-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to %struct.anon.0* // CHECK5-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP44]], i32 0, i32 0 // CHECK5-NEXT: [[TMP47:%.*]] = bitcast [3 x i8*]* [[TMP46]] to i8* // CHECK5-NEXT: [[TMP48:%.*]] = bitcast i8** [[TMP30]] to i8* @@ -4017,7 +4097,7 @@ // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -4028,20 +4108,20 @@ // CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK5-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK5-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP4]]) +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK5-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 2 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4051,54 +4131,57 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK5-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK5-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 10, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4143,7 +4226,7 @@ // CHECK5-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK5-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK5-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK5-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK5-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i16*, align 8 // CHECK5-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca [3 x i8*]*, align 8 // CHECK5-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [3 x i8*]*, align 8 @@ -4161,7 +4244,7 @@ // CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK5-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK5-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -4174,8 +4257,8 @@ // CHECK5-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !26 // CHECK5-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !26 // CHECK5-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !26 -// CHECK5-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !26 -// CHECK5-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !26 +// CHECK5-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !26 +// CHECK5-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !26 // CHECK5-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !26 // CHECK5-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !26 // CHECK5-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @@ -4187,8 +4270,8 @@ // CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK5-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10) #[[ATTR4]] // CHECK5-NEXT: [[TMP26:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* [[TMP22]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 [[TMP25]], i32 1, i32 0, i8* null, i32 0, i8* null) #[[ATTR4]] @@ -4217,23 +4300,23 @@ // CHECK5-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4243,59 +4326,62 @@ // CHECK5-NEXT: [[A1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK5-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[A1]], align 4, !nontemporal !27 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[A1]], align 4, !nontemporal !27 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[A1]], align 4, !nontemporal !27 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[A1]], align 4, !nontemporal !27 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK5-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK5-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: -// CHECK5-NEXT: store i32 10, i32* [[CONV]], align 4 +// CHECK5-NEXT: store i32 10, i32* [[A]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void @@ -4305,23 +4391,23 @@ // CHECK5-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK5-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK5-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK5-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 2 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4331,59 +4417,62 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK5-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK5-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 -// CHECK5-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK5-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK5-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP11:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !30 +// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK5-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 +// CHECK5-NEXT: store i16 [[CONV3]], i16* [[AA]], align 2, !llvm.access.group !30 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK5-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 10, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4396,31 +4485,29 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK5-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK5-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4430,64 +4517,68 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK5-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK5-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !33 -// CHECK5-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK5-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK5-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !33 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !33 +// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK5-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK5-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !33 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 10, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4507,7 +4598,7 @@ // CHECK5-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 // CHECK5-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -4526,28 +4617,36 @@ // CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK5-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK5-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [10 x float]*, i64, float*, [5 x [10 x double]]*, i64, i64, double*, %struct.TT*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], [10 x float]* [[TMP0]], i64 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]]) +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store i64 [[TMP1]], i64* [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store float* [[TMP2]], float** [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: store i64 [[TMP4]], i64* [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK5-NEXT: store i64 [[TMP5]], i64* [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK5-NEXT: store double* [[TMP6]], double** [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK5-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 -// CHECK5-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 -// CHECK5-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4557,108 +4656,111 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK5-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK5-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 -// CHECK5-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 -// CHECK5-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 -// CHECK5-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK5-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 0 +// CHECK5-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 7 +// CHECK5-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 8 +// CHECK5-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 8 +// CHECK5-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i64 0, i64 0 // CHECK5-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[ARRAYDECAY]], i64 16) ] // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double -// CHECK5-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 -// CHECK5-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK5-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK5-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[CONV11:%.*]] = fpext float [[TMP18]] to double -// CHECK5-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK5-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK5-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 -// CHECK5-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !36 -// CHECK5-NEXT: [[ADD16:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK5-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !36 -// CHECK5-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK5-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP20]] -// CHECK5-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i64 3 -// CHECK5-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !36 -// CHECK5-NEXT: [[ADD19:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK5-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !36 -// CHECK5-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !36 -// CHECK5-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK5-NEXT: store i64 [[ADD20]], i64* [[X]], align 8, !llvm.access.group !36 -// CHECK5-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !36 -// CHECK5-NEXT: [[CONV21:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK5-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK5-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK5-NEXT: store i8 [[CONV23]], i8* [[Y]], align 8, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK5-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i64 0, i64 2 +// CHECK5-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK5-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK5-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK5-NEXT: store float [[CONV4]], float* [[ARRAYIDX]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 3 +// CHECK5-NEXT: [[TMP29:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK5-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 +// CHECK5-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float +// CHECK5-NEXT: store float [[CONV8]], float* [[ARRAYIDX5]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i64 0, i64 1 +// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX9]], i64 0, i64 2 +// CHECK5-NEXT: [[TMP30:%.*]] = load double, double* [[ARRAYIDX10]], align 8, !llvm.access.group !36 +// CHECK5-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK5-NEXT: store double [[ADD11]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP31:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK5-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 [[TMP31]] +// CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX12]], i64 3 +// CHECK5-NEXT: [[TMP32:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !36 +// CHECK5-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK5-NEXT: store double [[ADD14]], double* [[ARRAYIDX13]], align 8, !llvm.access.group !36 +// CHECK5-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP33:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !36 +// CHECK5-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK5-NEXT: store i64 [[ADD15]], i64* [[X]], align 8, !llvm.access.group !36 +// CHECK5-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP34:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !36 +// CHECK5-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK5-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK5-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK5-NEXT: store i8 [[CONV18]], i8* [[Y]], align 8, !llvm.access.group !36 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK5-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK5-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]]) -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK5-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK5-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK5-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 10, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5025,8 +5127,7 @@ // CHECK5-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK5-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK5-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -5039,31 +5140,34 @@ // CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK5-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[CONV4]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV3]], align 1 -// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 -// CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store i64 [[TMP2]], i64* [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV3]], align 1 +// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK5-NEXT: store i8 [[FROMBOOL]], i8* [[CONV5]], align 1 -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]], i64 [[TMP7]]) +// CHECK5-NEXT: store i8 [[FROMBOOL]], i8* [[TMP10]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 +// CHECK5-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5073,120 +5177,126 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK5-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK5-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP11]], align 8 +// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK5-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 9 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 1 -// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK5-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double -// CHECK5-NEXT: [[ADD6:%.*]] = fadd double [[CONV5]], 1.500000e+00 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: store double [[ADD6]], double* [[A]], align 8, !llvm.access.group !39 -// CHECK5-NEXT: [[A7:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP14:%.*]] = load double, double* [[A7]], align 8, !llvm.access.group !39 -// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK5-NEXT: store double [[INC]], double* [[A7]], align 8, !llvm.access.group !39 -// CHECK5-NEXT: [[CONV8:%.*]] = fptosi double [[INC]] to i16 -// CHECK5-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP15]] -// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK5-NEXT: store i16 [[CONV8]], i16* [[ARRAYIDX9]], align 2, !llvm.access.group !39 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[B]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double +// CHECK5-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: store double [[ADD3]], double* [[A]], align 8, !llvm.access.group !39 +// CHECK5-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP23:%.*]] = load double, double* [[A4]], align 8, !llvm.access.group !39 +// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK5-NEXT: store double [[INC]], double* [[A4]], align 8, !llvm.access.group !39 +// CHECK5-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK5-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i64 [[TMP24]] +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK5-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2, !llvm.access.group !39 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_IF_END:%.*]] // CHECK5: omp_if.else: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND11:%.*]] -// CHECK5: omp.inner.for.cond11: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK5-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY13:%.*]], label [[OMP_INNER_FOR_END27:%.*]] -// CHECK5: omp.inner.for.body13: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL14:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK5-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] -// CHECK5-NEXT: store i32 [[ADD15]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV16:%.*]] = sitofp i32 [[TMP20]] to double -// CHECK5-NEXT: [[ADD17:%.*]] = fadd double [[CONV16]], 1.500000e+00 -// CHECK5-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: store double [[ADD17]], double* [[A18]], align 8 -// CHECK5-NEXT: [[A19:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP21:%.*]] = load double, double* [[A19]], align 8 -// CHECK5-NEXT: [[INC20:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK5-NEXT: store double [[INC20]], double* [[A19]], align 8 -// CHECK5-NEXT: [[CONV21:%.*]] = fptosi double [[INC20]] to i16 -// CHECK5-NEXT: [[TMP22:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK5-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP22]] -// CHECK5-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX22]], i64 1 -// CHECK5-NEXT: store i16 [[CONV21]], i16* [[ARRAYIDX23]], align 2 -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE24:%.*]] -// CHECK5: omp.body.continue24: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC25:%.*]] -// CHECK5: omp.inner.for.inc25: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK5-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND11]], !llvm.loop [[LOOP42:![0-9]+]] -// CHECK5: omp.inner.for.end27: +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK5: omp.inner.for.cond8: +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END24:%.*]] +// CHECK5: omp.inner.for.body10: +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP28]], 1 +// CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK5-NEXT: store i32 [[ADD12]], i32* [[I]], align 4 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[B]], align 4 +// CHECK5-NEXT: [[CONV13:%.*]] = sitofp i32 [[TMP29]] to double +// CHECK5-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.500000e+00 +// CHECK5-NEXT: [[A15:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: store double [[ADD14]], double* [[A15]], align 8 +// CHECK5-NEXT: [[A16:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP30:%.*]] = load double, double* [[A16]], align 8 +// CHECK5-NEXT: [[INC17:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK5-NEXT: store double [[INC17]], double* [[A16]], align 8 +// CHECK5-NEXT: [[CONV18:%.*]] = fptosi double [[INC17]] to i16 +// CHECK5-NEXT: [[TMP31:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK5-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i64 [[TMP31]] +// CHECK5-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX19]], i64 1 +// CHECK5-NEXT: store i16 [[CONV18]], i16* [[ARRAYIDX20]], align 2 +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE21:%.*]] +// CHECK5: omp.body.continue21: +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC22:%.*]] +// CHECK5: omp.inner.for.inc22: +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK5-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK5: omp.inner.for.end24: // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK5-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 10, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5202,10 +5312,7 @@ // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 @@ -5216,159 +5323,163 @@ // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 -// CHECK5-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK5-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 -// CHECK5-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* -// CHECK5-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV2]], align 2 +// CHECK5-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV3]], align 1 +// CHECK5-NEXT: store i8 [[TMP8]], i8* [[TMP7]], align 2 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP9]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[N:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[I8:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 -// CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* -// CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] -// CHECK5-NEXT: [[SUB6:%.*]] = sub i32 [[SUB]], 1 -// CHECK5-NEXT: [[ADD:%.*]] = add i32 [[SUB6]], 1 +// CHECK5-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], i32* [[N]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP5]], align 8 +// CHECK5-NEXT: store i16 [[TMP6]], i16* [[AA]], align 2 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 2 +// CHECK5-NEXT: store i8 [[TMP8]], i8* [[AAA]], align 1 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[A]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[N]], align 4 +// CHECK5-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK5-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK5-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK5-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 -// CHECK5-NEXT: [[SUB7:%.*]] = sub i32 [[DIV]], 1 -// CHECK5-NEXT: store i32 [[SUB7]], i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK5-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], i32* [[I]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK5: omp.precond.then: // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK5-NEXT: [[CMP9:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 -// CHECK5-NEXT: [[ADD10:%.*]] = add i32 [[TMP17]], 1 -// CHECK5-NEXT: [[CMP11:%.*]] = icmp ult i32 [[TMP16]], [[ADD10]] -// CHECK5-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK5-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] +// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !44 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK5-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !44 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !44 -// CHECK5-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK5-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 4, !llvm.access.group !44 -// CHECK5-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2, !llvm.access.group !44 -// CHECK5-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 -// CHECK5-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 -// CHECK5-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK5-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2, !llvm.access.group !44 -// CHECK5-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1, !llvm.access.group !44 -// CHECK5-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK5-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 -// CHECK5-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK5-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1, !llvm.access.group !44 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 -// CHECK5-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK5-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK5-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] +// CHECK5-NEXT: store i32 [[ADD9]], i32* [[I5]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK5-NEXT: store i32 [[ADD10]], i32* [[A]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP31:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !44 +// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 +// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK5-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 +// CHECK5-NEXT: store i16 [[CONV12]], i16* [[AA]], align 2, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP32:%.*]] = load i8, i8* [[AAA]], align 1, !llvm.access.group !44 +// CHECK5-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 +// CHECK5-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 +// CHECK5-NEXT: store i8 [[CONV15]], i8* [[AAA]], align 1, !llvm.access.group !44 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP10]], i64 0, i64 2 +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK5-NEXT: store i32 [[ADD16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK5-NEXT: [[ADD21:%.*]] = add i32 [[TMP24]], 1 -// CHECK5-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 +// CHECK5-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK5-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK5-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: -// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK5-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[SUB22:%.*]] = sub i32 [[TMP30]], [[TMP31]] -// CHECK5-NEXT: [[SUB23:%.*]] = sub i32 [[SUB22]], 1 -// CHECK5-NEXT: [[ADD24:%.*]] = add i32 [[SUB23]], 1 -// CHECK5-NEXT: [[DIV25:%.*]] = udiv i32 [[ADD24]], 1 -// CHECK5-NEXT: [[MUL26:%.*]] = mul i32 [[DIV25]], 1 -// CHECK5-NEXT: [[ADD27:%.*]] = add i32 [[TMP29]], [[MUL26]] -// CHECK5-NEXT: store i32 [[ADD27]], i32* [[I8]], align 4 +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] +// CHECK5-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 +// CHECK5-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 +// CHECK5-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 +// CHECK5-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 +// CHECK5-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] +// CHECK5-NEXT: store i32 [[ADD23]], i32* [[I5]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: br label [[OMP_PRECOND_END]] @@ -5382,34 +5493,33 @@ // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK5-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK5-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5419,70 +5529,74 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK5-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !47 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !47 -// CHECK5-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !47 -// CHECK5-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK5-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK5-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !47 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !47 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK5-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !47 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !47 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !47 +// CHECK5-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !47 +// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK5-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK5-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !47 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 2 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !47 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK5-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !47 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK5-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 10, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5517,7 +5631,7 @@ // CHECK7-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 // CHECK7-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 // CHECK7-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 -// CHECK7-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK7-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[AA_CASTED4:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOFFLOAD_BASEPTRS6:%.*]] = alloca [1 x i8*], align 4 @@ -5589,13 +5703,13 @@ // CHECK7-NEXT: store i8* null, i8** [[TMP27]], align 4 // CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 // CHECK7-NEXT: [[TMP31:%.*]] = load i16, i16* [[AA]], align 2 // CHECK7-NEXT: store i16 [[TMP31]], i16* [[TMP30]], align 4 -// CHECK7-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK7-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK7-NEXT: store i32 [[TMP33]], i32* [[TMP32]], align 4 -// CHECK7-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 2 // CHECK7-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK7-NEXT: store i32 [[TMP35]], i32* [[TMP34]], align 4 // CHECK7-NEXT: [[TMP36:%.*]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 72, i32 12, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*), i64 -1) @@ -5603,10 +5717,10 @@ // CHECK7-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 0 // CHECK7-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP38]], i32 0, i32 0 // CHECK7-NEXT: [[TMP40:%.*]] = load i8*, i8** [[TMP39]], align 4 -// CHECK7-NEXT: [[TMP41:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// CHECK7-NEXT: [[TMP41:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* // CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i32 12, i1 false) // CHECK7-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP37]], i32 0, i32 1 -// CHECK7-NEXT: [[TMP43:%.*]] = bitcast i8* [[TMP40]] to %struct.anon* +// CHECK7-NEXT: [[TMP43:%.*]] = bitcast i8* [[TMP40]] to %struct.anon.0* // CHECK7-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP42]], i32 0, i32 0 // CHECK7-NEXT: [[TMP45:%.*]] = bitcast [3 x i64]* [[TMP44]] to i8* // CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP45]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false) @@ -5808,7 +5922,7 @@ // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK7-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK7-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK7-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -5817,20 +5931,20 @@ // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK7-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK7-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK7-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK7-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 2 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5840,54 +5954,57 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK7-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK7-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK7-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK7-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK7-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 10, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5932,7 +6049,7 @@ // CHECK7-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 4 // CHECK7-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 4 // CHECK7-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 4 -// CHECK7-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 4 // CHECK7-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i16*, align 4 // CHECK7-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca [3 x i8*]*, align 4 // CHECK7-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [3 x i8*]*, align 4 @@ -5950,7 +6067,7 @@ // CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK7-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK7-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK7-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK7-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -5963,8 +6080,8 @@ // CHECK7-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !27 // CHECK7-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !27 // CHECK7-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !27 -// CHECK7-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !27 -// CHECK7-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !27 +// CHECK7-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !27 +// CHECK7-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !27 // CHECK7-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !27 // CHECK7-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !27 // CHECK7-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @@ -5976,8 +6093,8 @@ // CHECK7-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK7-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 -// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP12]], i32 0, i32 2 // CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK7-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10) #[[ATTR4]] // CHECK7-NEXT: [[TMP26:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* [[TMP22]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 [[TMP25]], i32 1, i32 0, i8* null, i32 0, i8* null) #[[ATTR4]] @@ -6004,21 +6121,22 @@ // CHECK7-SAME: (i32 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6028,58 +6146,62 @@ // CHECK7-NEXT: [[A1:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[A1]], align 4, !nontemporal !28 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[A1]], align 4, !nontemporal !28 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[A1]], align 4, !nontemporal !28 +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK7-NEXT: store i32 [[ADD3]], i32* [[A1]], align 4, !nontemporal !28 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK7-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK7-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK7-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: -// CHECK7-NEXT: store i32 10, i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 10, i32* [[A]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: // CHECK7-NEXT: ret void @@ -6089,23 +6211,23 @@ // CHECK7-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK7-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK7-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK7-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK7-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 2 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6115,59 +6237,62 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK7-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK7-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 -// CHECK7-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !31 -// CHECK7-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK7-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK7-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !31 +// CHECK7-NEXT: [[TMP11:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !31 +// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK7-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 +// CHECK7-NEXT: store i16 [[CONV3]], i16* [[AA]], align 2, !llvm.access.group !31 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK7-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK7-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 10, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6180,29 +6305,28 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK7-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK7-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK7-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6212,63 +6336,68 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK7-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK7-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !34 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !34 -// CHECK7-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !34 -// CHECK7-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK7-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK7-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !34 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !34 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !34 +// CHECK7-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !34 +// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK7-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK7-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !34 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK7-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK7-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK7-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK7-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 10, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6288,7 +6417,7 @@ // CHECK7-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 // CHECK7-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK7-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 // CHECK7-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -6306,27 +6435,36 @@ // CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 // CHECK7-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 // CHECK7-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x float]*, i32, float*, [5 x [10 x double]]*, i32, i32, double*, %struct.TT*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP9]], [10 x float]* [[TMP0]], i32 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]]) +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store i32 [[TMP1]], i32* [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store float* [[TMP2]], float** [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK7-NEXT: store i32 [[TMP5]], i32* [[TMP15]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK7-NEXT: store double* [[TMP6]], double** [[TMP16]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK7-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 -// CHECK7-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 -// CHECK7-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 -// CHECK7-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 -// CHECK7-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6336,107 +6474,111 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK7-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 -// CHECK7-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 -// CHECK7-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 -// CHECK7-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK7-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 7 +// CHECK7-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 8 +// CHECK7-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 4 +// CHECK7-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i32 0, i32 0 // CHECK7-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[ARRAYDECAY]], i32 16) ] // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 -// CHECK7-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK7-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !37 -// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK7-NEXT: store i32 [[ADD6]], i32* [[A_ADDR]], align 4, !llvm.access.group !37 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !37 -// CHECK7-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double -// CHECK7-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !37 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK7-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !37 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !37 +// CHECK7-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK7-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK7-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK7-NEXT: store float [[CONV4]], float* [[ARRAYIDX]], align 4, !llvm.access.group !37 +// CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 3 +// CHECK7-NEXT: [[TMP29:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !37 +// CHECK7-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK7-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK7-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK7-NEXT: store float [[CONV8]], float* [[ARRAYIDX]], align 4, !llvm.access.group !37 -// CHECK7-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK7-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4, !llvm.access.group !37 -// CHECK7-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double -// CHECK7-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 -// CHECK7-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK7-NEXT: store float [[CONV12]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !37 -// CHECK7-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 -// CHECK7-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !37 -// CHECK7-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK7-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !37 -// CHECK7-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK7-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP20]] -// CHECK7-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX16]], i32 3 -// CHECK7-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !37 -// CHECK7-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK7-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !37 -// CHECK7-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !37 -// CHECK7-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK7-NEXT: store i64 [[ADD19]], i64* [[X]], align 4, !llvm.access.group !37 -// CHECK7-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK7-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !37 -// CHECK7-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK7-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 -// CHECK7-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK7-NEXT: store i8 [[CONV22]], i8* [[Y]], align 4, !llvm.access.group !37 +// CHECK7-NEXT: store float [[CONV8]], float* [[ARRAYIDX5]], align 4, !llvm.access.group !37 +// CHECK7-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i32 0, i32 1 +// CHECK7-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX9]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP30:%.*]] = load double, double* [[ARRAYIDX10]], align 8, !llvm.access.group !37 +// CHECK7-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK7-NEXT: store double [[ADD11]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !37 +// CHECK7-NEXT: [[TMP31:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK7-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 [[TMP31]] +// CHECK7-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX12]], i32 3 +// CHECK7-NEXT: [[TMP32:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !37 +// CHECK7-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK7-NEXT: store double [[ADD14]], double* [[ARRAYIDX13]], align 8, !llvm.access.group !37 +// CHECK7-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP33:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !37 +// CHECK7-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK7-NEXT: store i64 [[ADD15]], i64* [[X]], align 4, !llvm.access.group !37 +// CHECK7-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP34:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !37 +// CHECK7-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK7-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK7-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK7-NEXT: store i8 [[CONV18]], i8* [[Y]], align 4, !llvm.access.group !37 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK7-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK7-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK7-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK7-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]]) -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK7-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK7-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK7-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 10, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6799,8 +6941,7 @@ // CHECK7-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 // CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK7-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK7-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK7-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -6812,30 +6953,34 @@ // CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK7-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 // CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 -// CHECK7-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK7-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK7-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]], i32 [[TMP7]]) +// CHECK7-NEXT: store i8 [[FROMBOOL]], i8* [[TMP10]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 -// CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK7-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6845,119 +6990,126 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK7-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP11]], align 4 +// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK7-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK7-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 9 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK7-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK7: omp_if.then: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 -// CHECK7-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK7-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 +// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !40 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !40 -// CHECK7-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP13]] to double -// CHECK7-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: store double [[ADD5]], double* [[A]], align 4, !llvm.access.group !40 -// CHECK7-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP14:%.*]] = load double, double* [[A6]], align 4, !llvm.access.group !40 -// CHECK7-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK7-NEXT: store double [[INC]], double* [[A6]], align 4, !llvm.access.group !40 -// CHECK7-NEXT: [[CONV7:%.*]] = fptosi double [[INC]] to i16 -// CHECK7-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP15]] -// CHECK7-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK7-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2, !llvm.access.group !40 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[B]], align 4, !llvm.access.group !40 +// CHECK7-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double +// CHECK7-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK7-NEXT: store double [[ADD3]], double* [[A]], align 4, !llvm.access.group !40 +// CHECK7-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP23:%.*]] = load double, double* [[A4]], align 4, !llvm.access.group !40 +// CHECK7-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK7-NEXT: store double [[INC]], double* [[A4]], align 4, !llvm.access.group !40 +// CHECK7-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK7-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 [[TMP24]] +// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK7-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2, !llvm.access.group !40 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK7-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK7-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK7-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK7-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] // CHECK7: omp_if.else: -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] -// CHECK7: omp.inner.for.cond10: -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK7-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END26:%.*]] -// CHECK7: omp.inner.for.body12: -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK7-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] -// CHECK7-NEXT: store i32 [[ADD14]], i32* [[I]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK7-NEXT: [[CONV15:%.*]] = sitofp i32 [[TMP20]] to double -// CHECK7-NEXT: [[ADD16:%.*]] = fadd double [[CONV15]], 1.500000e+00 -// CHECK7-NEXT: [[A17:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: store double [[ADD16]], double* [[A17]], align 4 -// CHECK7-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP21:%.*]] = load double, double* [[A18]], align 4 -// CHECK7-NEXT: [[INC19:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK7-NEXT: store double [[INC19]], double* [[A18]], align 4 -// CHECK7-NEXT: [[CONV20:%.*]] = fptosi double [[INC19]] to i16 -// CHECK7-NEXT: [[TMP22:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK7-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP22]] -// CHECK7-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX21]], i32 1 -// CHECK7-NEXT: store i16 [[CONV20]], i16* [[ARRAYIDX22]], align 2 -// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE23:%.*]] -// CHECK7: omp.body.continue23: -// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC24:%.*]] -// CHECK7: omp.inner.for.inc24: -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK7-NEXT: store i32 [[ADD25]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP43:![0-9]+]] -// CHECK7: omp.inner.for.end26: +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK7: omp.inner.for.cond8: +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK7-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END24:%.*]] +// CHECK7: omp.inner.for.body10: +// CHECK7-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP28]], 1 +// CHECK7-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK7-NEXT: store i32 [[ADD12]], i32* [[I]], align 4 +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, i32* [[B]], align 4 +// CHECK7-NEXT: [[CONV13:%.*]] = sitofp i32 [[TMP29]] to double +// CHECK7-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.500000e+00 +// CHECK7-NEXT: [[A15:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK7-NEXT: store double [[ADD14]], double* [[A15]], align 4 +// CHECK7-NEXT: [[A16:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP30:%.*]] = load double, double* [[A16]], align 4 +// CHECK7-NEXT: [[INC17:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK7-NEXT: store double [[INC17]], double* [[A16]], align 4 +// CHECK7-NEXT: [[CONV18:%.*]] = fptosi double [[INC17]] to i16 +// CHECK7-NEXT: [[TMP31:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK7-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 [[TMP31]] +// CHECK7-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX19]], i32 1 +// CHECK7-NEXT: store i16 [[CONV18]], i16* [[ARRAYIDX20]], align 2 +// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE21:%.*]] +// CHECK7: omp.body.continue21: +// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC22:%.*]] +// CHECK7: omp.inner.for.inc22: +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK7-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP43:![0-9]+]] +// CHECK7: omp.inner.for.end24: // CHECK7-NEXT: br label [[OMP_IF_END]] // CHECK7: omp_if.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK7-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK7-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 10, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6973,10 +7125,7 @@ // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 @@ -6985,155 +7134,163 @@ // CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* // CHECK7-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK7-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK7-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK7-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* -// CHECK7-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK7-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK7-NEXT: store i8 [[TMP8]], i8* [[TMP7]], align 2 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP9]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[N:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK7-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 -// CHECK7-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* -// CHECK7-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] -// CHECK7-NEXT: [[SUB4:%.*]] = sub i32 [[SUB]], 1 -// CHECK7-NEXT: [[ADD:%.*]] = add i32 [[SUB4]], 1 +// CHECK7-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[N]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP5]], align 4 +// CHECK7-NEXT: store i16 [[TMP6]], i16* [[AA]], align 2 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 2 +// CHECK7-NEXT: store i8 [[TMP8]], i8* [[AAA]], align 1 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[A]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[N]], align 4 +// CHECK7-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK7-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK7-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK7-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 -// CHECK7-NEXT: [[SUB5:%.*]] = sub i32 [[DIV]], 1 -// CHECK7-NEXT: store i32 [[SUB5]], i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], i32* [[I]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK7-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], i32* [[I]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK7: omp.precond.then: // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK7-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK7-NEXT: [[CMP7:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] -// CHECK7-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK7-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK7-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 -// CHECK7-NEXT: [[ADD8:%.*]] = add i32 [[TMP17]], 1 -// CHECK7-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP16]], [[ADD8]] -// CHECK7-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK7-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] +// CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !45 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK7-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK7-NEXT: [[ADD10:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK7-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4, !llvm.access.group !45 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !45 -// CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK7-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !45 -// CHECK7-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !45 -// CHECK7-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 -// CHECK7-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 -// CHECK7-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK7-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !45 -// CHECK7-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1, !llvm.access.group !45 -// CHECK7-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK7-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 -// CHECK7-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK7-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1, !llvm.access.group !45 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 -// CHECK7-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK7-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK7-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] +// CHECK7-NEXT: store i32 [[ADD9]], i32* [[I5]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK7-NEXT: store i32 [[ADD10]], i32* [[A]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP31:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !45 +// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 +// CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK7-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 +// CHECK7-NEXT: store i16 [[CONV12]], i16* [[AA]], align 2, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP32:%.*]] = load i8, i8* [[AAA]], align 1, !llvm.access.group !45 +// CHECK7-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK7-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 +// CHECK7-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 +// CHECK7-NEXT: store i8 [[CONV15]], i8* [[AAA]], align 1, !llvm.access.group !45 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP10]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK7-NEXT: store i32 [[ADD16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK7-NEXT: [[ADD19:%.*]] = add i32 [[TMP24]], 1 -// CHECK7-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 +// CHECK7-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK7-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK7-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK7-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: -// CHECK7-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK7-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: [[SUB20:%.*]] = sub i32 [[TMP30]], [[TMP31]] -// CHECK7-NEXT: [[SUB21:%.*]] = sub i32 [[SUB20]], 1 -// CHECK7-NEXT: [[ADD22:%.*]] = add i32 [[SUB21]], 1 -// CHECK7-NEXT: [[DIV23:%.*]] = udiv i32 [[ADD22]], 1 -// CHECK7-NEXT: [[MUL24:%.*]] = mul i32 [[DIV23]], 1 -// CHECK7-NEXT: [[ADD25:%.*]] = add i32 [[TMP29]], [[MUL24]] -// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I6]], align 4 +// CHECK7-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] +// CHECK7-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 +// CHECK7-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 +// CHECK7-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 +// CHECK7-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 +// CHECK7-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] +// CHECK7-NEXT: store i32 [[ADD23]], i32* [[I5]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: // CHECK7-NEXT: br label [[OMP_PRECOND_END]] @@ -7147,32 +7304,32 @@ // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK7-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK7-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK7-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK7-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7182,69 +7339,74 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK7-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK7-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK7-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !48 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !48 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !48 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !48 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !48 -// CHECK7-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !48 -// CHECK7-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK7-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK7-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !48 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !48 -// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK7-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !48 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !48 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK7-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !48 +// CHECK7-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !48 +// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK7-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK7-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !48 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !48 +// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK7-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !48 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 -// CHECK7-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK7-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK7-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK7-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 10, i32* [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9397,7 +9559,7 @@ // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -9408,20 +9570,20 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP4]]) +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK17-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 2 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9431,54 +9593,57 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK17-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK17-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK17-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK17-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9490,23 +9655,23 @@ // CHECK17-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK17-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK17-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 2 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9516,59 +9681,62 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK17-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK17-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !18 -// CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP11:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !18 +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 +// CHECK17-NEXT: store i16 [[CONV3]], i16* [[AA]], align 2, !llvm.access.group !18 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK17-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9581,31 +9749,29 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK17-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK17-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9615,64 +9781,68 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK17-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !21 -// CHECK17-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK17-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !21 +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK17-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !21 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK17-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9692,7 +9862,7 @@ // CHECK17-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -9711,28 +9881,36 @@ // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [10 x float]*, i64, float*, [5 x [10 x double]]*, i64, i64, double*, %struct.TT*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], [10 x float]* [[TMP0]], i64 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]]) +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP1]], i64* [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store float* [[TMP2]], float** [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: store i64 [[TMP5]], i64* [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK17-NEXT: store double* [[TMP6]], double** [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK17-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 -// CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9742,108 +9920,111 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK17-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 -// CHECK17-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 -// CHECK17-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 -// CHECK17-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 0 +// CHECK17-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 8 +// CHECK17-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i64 0, i64 0 // CHECK17-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[ARRAYDECAY]], i64 16) ] // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK17-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double -// CHECK17-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 -// CHECK17-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK17-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK17-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[CONV11:%.*]] = fpext float [[TMP18]] to double -// CHECK17-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK17-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK17-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 -// CHECK17-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 -// CHECK17-NEXT: [[ADD16:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 -// CHECK17-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK17-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP20]] -// CHECK17-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i64 3 -// CHECK17-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 -// CHECK17-NEXT: [[ADD19:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 -// CHECK17-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !24 -// CHECK17-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK17-NEXT: store i64 [[ADD20]], i64* [[X]], align 8, !llvm.access.group !24 -// CHECK17-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !24 -// CHECK17-NEXT: [[CONV21:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK17-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK17-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK17-NEXT: store i8 [[CONV23]], i8* [[Y]], align 8, !llvm.access.group !24 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK17-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK17-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK17-NEXT: store float [[CONV4]], float* [[ARRAYIDX]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 3 +// CHECK17-NEXT: [[TMP29:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK17-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 +// CHECK17-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float +// CHECK17-NEXT: store float [[CONV8]], float* [[ARRAYIDX5]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i64 0, i64 1 +// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX9]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP30:%.*]] = load double, double* [[ARRAYIDX10]], align 8, !llvm.access.group !24 +// CHECK17-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK17-NEXT: store double [[ADD11]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !24 +// CHECK17-NEXT: [[TMP31:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK17-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 [[TMP31]] +// CHECK17-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX12]], i64 3 +// CHECK17-NEXT: [[TMP32:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !24 +// CHECK17-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK17-NEXT: store double [[ADD14]], double* [[ARRAYIDX13]], align 8, !llvm.access.group !24 +// CHECK17-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !24 +// CHECK17-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK17-NEXT: store i64 [[ADD15]], i64* [[X]], align 8, !llvm.access.group !24 +// CHECK17-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP34:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !24 +// CHECK17-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK17-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK17-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK17-NEXT: store i8 [[CONV18]], i8* [[Y]], align 8, !llvm.access.group !24 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK17-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK17-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]]) -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK17-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9859,10 +10040,7 @@ // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 @@ -9873,159 +10051,163 @@ // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 -// CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK17-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 -// CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* -// CHECK17-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV2]], align 2 +// CHECK17-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV3]], align 1 +// CHECK17-NEXT: store i8 [[TMP8]], i8* [[TMP7]], align 2 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP9]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[N:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK17-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I8:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] -// CHECK17-NEXT: [[SUB6:%.*]] = sub i32 [[SUB]], 1 -// CHECK17-NEXT: [[ADD:%.*]] = add i32 [[SUB6]], 1 +// CHECK17-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[N]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP5]], align 8 +// CHECK17-NEXT: store i16 [[TMP6]], i16* [[AA]], align 2 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 2 +// CHECK17-NEXT: store i8 [[TMP8]], i8* [[AAA]], align 1 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[A]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK17-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK17-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 -// CHECK17-NEXT: [[SUB7:%.*]] = sub i32 [[DIV]], 1 -// CHECK17-NEXT: store i32 [[SUB7]], i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK17-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP15]], i32* [[I]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK17-NEXT: [[CMP9:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] -// CHECK17-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK17-NEXT: [[ADD10:%.*]] = add i32 [[TMP17]], 1 -// CHECK17-NEXT: [[CMP11:%.*]] = icmp ult i32 [[TMP16]], [[ADD10]] -// CHECK17-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK17-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] +// CHECK17-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !27 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK17-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK17-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !27 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !27 -// CHECK17-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK17-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 4, !llvm.access.group !27 -// CHECK17-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2, !llvm.access.group !27 -// CHECK17-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 -// CHECK17-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 -// CHECK17-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2, !llvm.access.group !27 -// CHECK17-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1, !llvm.access.group !27 -// CHECK17-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK17-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 -// CHECK17-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK17-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1, !llvm.access.group !27 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 -// CHECK17-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK17-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK17-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] +// CHECK17-NEXT: store i32 [[ADD9]], i32* [[I5]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK17-NEXT: store i32 [[ADD10]], i32* [[A]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: [[TMP31:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !27 +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 +// CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK17-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 +// CHECK17-NEXT: store i16 [[CONV12]], i16* [[AA]], align 2, !llvm.access.group !27 +// CHECK17-NEXT: [[TMP32:%.*]] = load i8, i8* [[AAA]], align 1, !llvm.access.group !27 +// CHECK17-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK17-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 +// CHECK17-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 +// CHECK17-NEXT: store i8 [[CONV15]], i8* [[AAA]], align 1, !llvm.access.group !27 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP10]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK17-NEXT: store i32 [[ADD16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK17-NEXT: [[ADD21:%.*]] = add i32 [[TMP24]], 1 -// CHECK17-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 +// CHECK17-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK17-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK17-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB22:%.*]] = sub i32 [[TMP30]], [[TMP31]] -// CHECK17-NEXT: [[SUB23:%.*]] = sub i32 [[SUB22]], 1 -// CHECK17-NEXT: [[ADD24:%.*]] = add i32 [[SUB23]], 1 -// CHECK17-NEXT: [[DIV25:%.*]] = udiv i32 [[ADD24]], 1 -// CHECK17-NEXT: [[MUL26:%.*]] = mul i32 [[DIV25]], 1 -// CHECK17-NEXT: [[ADD27:%.*]] = add i32 [[TMP29]], [[MUL26]] -// CHECK17-NEXT: store i32 [[ADD27]], i32* [[I8]], align 4 +// CHECK17-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] +// CHECK17-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 +// CHECK17-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 +// CHECK17-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 +// CHECK17-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 +// CHECK17-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] +// CHECK17-NEXT: store i32 [[ADD23]], i32* [[I5]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: // CHECK17-NEXT: br label [[OMP_PRECOND_END]] @@ -10041,7 +10223,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 -// CHECK17-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK17-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -10052,24 +10234,28 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]]) +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK17-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10079,76 +10265,79 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 -// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !30 -// CHECK17-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK17-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: store double [[ADD5]], double* [[A]], align 8, !llvm.access.group !30 -// CHECK17-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP13:%.*]] = load double, double* [[A6]], align 8, !llvm.access.group !30 -// CHECK17-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK17-NEXT: store double [[INC]], double* [[A6]], align 8, !llvm.access.group !30 -// CHECK17-NEXT: [[CONV7:%.*]] = fptosi double [[INC]] to i16 -// CHECK17-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP14]] -// CHECK17-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK17-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2, !llvm.access.group !30 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[B]], align 4, !llvm.access.group !30 +// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK17-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK17-NEXT: store double [[ADD2]], double* [[A]], align 8, !llvm.access.group !30 +// CHECK17-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP20:%.*]] = load double, double* [[A3]], align 8, !llvm.access.group !30 +// CHECK17-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK17-NEXT: store double [[INC]], double* [[A3]], align 8, !llvm.access.group !30 +// CHECK17-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK17-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i64 [[TMP21]] +// CHECK17-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK17-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2, !llvm.access.group !30 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10162,34 +10351,33 @@ // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK17-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10199,70 +10387,74 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !33 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !33 -// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !33 -// CHECK17-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK17-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !33 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !33 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !33 +// CHECK17-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !33 +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK17-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !33 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK17-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10276,7 +10468,7 @@ // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -10285,20 +10477,20 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK19-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK19-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 2 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10308,54 +10500,57 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK19-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK19-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK19-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10367,23 +10562,23 @@ // CHECK19-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK19-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK19-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 2 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10393,59 +10588,62 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK19-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK19-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !19 -// CHECK19-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK19-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP11:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !19 +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK19-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 +// CHECK19-NEXT: store i16 [[CONV3]], i16* [[AA]], align 2, !llvm.access.group !19 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK19-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10458,29 +10656,28 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK19-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK19-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10490,63 +10687,68 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK19-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !22 -// CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !22 +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK19-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !22 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK19-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK19-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10566,7 +10768,7 @@ // CHECK19-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -10584,27 +10786,36 @@ // CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 // CHECK19-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x float]*, i32, float*, [5 x [10 x double]]*, i32, i32, double*, %struct.TT*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP9]], [10 x float]* [[TMP0]], i32 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]]) +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32 [[TMP1]], i32* [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store float* [[TMP2]], float** [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK19-NEXT: store i32 [[TMP5]], i32* [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK19-NEXT: store double* [[TMP6]], double** [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK19-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 -// CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10614,107 +10825,111 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 -// CHECK19-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 -// CHECK19-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 -// CHECK19-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK19-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 6 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 7 +// CHECK19-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 8 +// CHECK19-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 4 +// CHECK19-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i32 0, i32 0 // CHECK19-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[ARRAYDECAY]], i32 16) ] // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK19-NEXT: store i32 [[ADD6]], i32* [[A_ADDR]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double -// CHECK19-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK19-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK19-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK19-NEXT: store float [[CONV4]], float* [[ARRAYIDX]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 3 +// CHECK19-NEXT: [[TMP29:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK19-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK19-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK19-NEXT: store float [[CONV8]], float* [[ARRAYIDX]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK19-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double -// CHECK19-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 -// CHECK19-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK19-NEXT: store float [[CONV12]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 -// CHECK19-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !25 -// CHECK19-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !25 -// CHECK19-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK19-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP20]] -// CHECK19-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX16]], i32 3 -// CHECK19-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !25 -// CHECK19-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !25 -// CHECK19-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK19-NEXT: store i64 [[ADD19]], i64* [[X]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK19-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 -// CHECK19-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK19-NEXT: store i8 [[CONV22]], i8* [[Y]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: store float [[CONV8]], float* [[ARRAYIDX5]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i32 0, i32 1 +// CHECK19-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX9]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP30:%.*]] = load double, double* [[ARRAYIDX10]], align 8, !llvm.access.group !25 +// CHECK19-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK19-NEXT: store double [[ADD11]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !25 +// CHECK19-NEXT: [[TMP31:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK19-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 [[TMP31]] +// CHECK19-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX12]], i32 3 +// CHECK19-NEXT: [[TMP32:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !25 +// CHECK19-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK19-NEXT: store double [[ADD14]], double* [[ARRAYIDX13]], align 8, !llvm.access.group !25 +// CHECK19-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK19-NEXT: store i64 [[ADD15]], i64* [[X]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP34:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK19-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK19-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK19-NEXT: store i8 [[CONV18]], i8* [[Y]], align 4, !llvm.access.group !25 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK19-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK19-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]]) -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK19-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK19-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10730,10 +10945,7 @@ // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 @@ -10742,155 +10954,163 @@ // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK19-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK19-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* -// CHECK19-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK19-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK19-NEXT: store i8 [[TMP8]], i8* [[TMP7]], align 2 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP9]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[N:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK19-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] -// CHECK19-NEXT: [[SUB4:%.*]] = sub i32 [[SUB]], 1 -// CHECK19-NEXT: [[ADD:%.*]] = add i32 [[SUB4]], 1 +// CHECK19-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[N]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP5]], align 4 +// CHECK19-NEXT: store i16 [[TMP6]], i16* [[AA]], align 2 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 2 +// CHECK19-NEXT: store i8 [[TMP8]], i8* [[AAA]], align 1 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[A]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK19-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK19-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 -// CHECK19-NEXT: [[SUB5:%.*]] = sub i32 [[DIV]], 1 -// CHECK19-NEXT: store i32 [[SUB5]], i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK19-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[I]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK19-NEXT: [[CMP7:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] -// CHECK19-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK19-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 -// CHECK19-NEXT: [[ADD8:%.*]] = add i32 [[TMP17]], 1 -// CHECK19-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP16]], [[ADD8]] -// CHECK19-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK19-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] +// CHECK19-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !28 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK19-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK19-NEXT: [[ADD10:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK19-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4, !llvm.access.group !28 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !28 -// CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK19-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !28 -// CHECK19-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !28 -// CHECK19-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 -// CHECK19-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 -// CHECK19-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !28 -// CHECK19-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1, !llvm.access.group !28 -// CHECK19-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK19-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 -// CHECK19-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK19-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1, !llvm.access.group !28 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 -// CHECK19-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK19-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK19-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] +// CHECK19-NEXT: store i32 [[ADD9]], i32* [[I5]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK19-NEXT: store i32 [[ADD10]], i32* [[A]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: [[TMP31:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !28 +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 +// CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK19-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 +// CHECK19-NEXT: store i16 [[CONV12]], i16* [[AA]], align 2, !llvm.access.group !28 +// CHECK19-NEXT: [[TMP32:%.*]] = load i8, i8* [[AAA]], align 1, !llvm.access.group !28 +// CHECK19-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK19-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 +// CHECK19-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 +// CHECK19-NEXT: store i8 [[CONV15]], i8* [[AAA]], align 1, !llvm.access.group !28 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP10]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK19-NEXT: store i32 [[ADD16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK19-NEXT: [[ADD19:%.*]] = add i32 [[TMP24]], 1 -// CHECK19-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 +// CHECK19-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK19-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK19-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB20:%.*]] = sub i32 [[TMP30]], [[TMP31]] -// CHECK19-NEXT: [[SUB21:%.*]] = sub i32 [[SUB20]], 1 -// CHECK19-NEXT: [[ADD22:%.*]] = add i32 [[SUB21]], 1 -// CHECK19-NEXT: [[DIV23:%.*]] = udiv i32 [[ADD22]], 1 -// CHECK19-NEXT: [[MUL24:%.*]] = mul i32 [[DIV23]], 1 -// CHECK19-NEXT: [[ADD25:%.*]] = add i32 [[TMP29]], [[MUL24]] -// CHECK19-NEXT: store i32 [[ADD25]], i32* [[I6]], align 4 +// CHECK19-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] +// CHECK19-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 +// CHECK19-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 +// CHECK19-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 +// CHECK19-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 +// CHECK19-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] +// CHECK19-NEXT: store i32 [[ADD23]], i32* [[I5]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK19: .omp.final.done: // CHECK19-NEXT: br label [[OMP_PRECOND_END]] @@ -10906,7 +11126,7 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 -// CHECK19-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK19-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -10916,23 +11136,28 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK19-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]]) +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK19-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10942,75 +11167,79 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 -// CHECK19-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK19-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !31 -// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK19-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: store double [[ADD4]], double* [[A]], align 4, !llvm.access.group !31 -// CHECK19-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 4, !llvm.access.group !31 -// CHECK19-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK19-NEXT: store double [[INC]], double* [[A5]], align 4, !llvm.access.group !31 -// CHECK19-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK19-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP14]] -// CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK19-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2, !llvm.access.group !31 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[B]], align 4, !llvm.access.group !31 +// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK19-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK19-NEXT: store double [[ADD2]], double* [[A]], align 4, !llvm.access.group !31 +// CHECK19-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP20:%.*]] = load double, double* [[A3]], align 4, !llvm.access.group !31 +// CHECK19-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK19-NEXT: store double [[INC]], double* [[A3]], align 4, !llvm.access.group !31 +// CHECK19-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK19-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 [[TMP21]] +// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK19-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2, !llvm.access.group !31 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK19-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK19-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11024,32 +11253,32 @@ // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK19-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11059,69 +11288,74 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !34 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !34 -// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !34 -// CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !34 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK19-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !34 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !34 +// CHECK19-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !34 +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK19-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !34 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11135,7 +11369,7 @@ // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK21-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) // CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK21-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -11146,20 +11380,20 @@ // CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK21-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK21-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP4]]) +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK21-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 2 +// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11169,54 +11403,57 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK21-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK21-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK21-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK21-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK21-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK21-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 10, i32* [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11228,23 +11465,23 @@ // CHECK21-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK21-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK21-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK21-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 2 +// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11254,59 +11491,62 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK21-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK21-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK21-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK21-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !18 -// CHECK21-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK21-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK21-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !18 +// CHECK21-NEXT: [[TMP11:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !18 +// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK21-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 +// CHECK21-NEXT: store i16 [[CONV3]], i16* [[AA]], align 2, !llvm.access.group !18 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK21-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK21-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK21-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK21-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK21-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 10, i32* [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11319,31 +11559,29 @@ // CHECK21-NEXT: entry: // CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK21-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK21-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK21-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK21-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 -// CHECK21-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP3]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK21-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK21-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11353,64 +11591,68 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK21-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK21-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK21-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK21-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !21 -// CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK21-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !21 -// CHECK21-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !21 -// CHECK21-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK21-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK21-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !21 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !21 +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK21-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !21 +// CHECK21-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !21 +// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK21-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK21-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !21 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK21-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK21-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK21-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK21-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK21-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 10, i32* [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11430,7 +11672,7 @@ // CHECK21-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 // CHECK21-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK21-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK21-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 // CHECK21-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -11449,28 +11691,36 @@ // CHECK21-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK21-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK21-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK21-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, [10 x float]*, i64, float*, [5 x [10 x double]]*, i64, i64, double*, %struct.TT*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], [10 x float]* [[TMP0]], i64 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]]) +// CHECK21-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK21-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: store i64 [[TMP1]], i64* [[TMP11]], align 8 +// CHECK21-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store float* [[TMP2]], float** [[TMP12]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK21-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 8 +// CHECK21-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK21-NEXT: store i64 [[TMP4]], i64* [[TMP14]], align 8 +// CHECK21-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK21-NEXT: store i64 [[TMP5]], i64* [[TMP15]], align 8 +// CHECK21-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK21-NEXT: store double* [[TMP6]], double** [[TMP16]], align 8 +// CHECK21-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK21-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 8 +// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 -// CHECK21-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 -// CHECK21-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 -// CHECK21-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 -// CHECK21-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11480,108 +11730,111 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK21-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK21-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 -// CHECK21-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 -// CHECK21-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 -// CHECK21-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 -// CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK21-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK21-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 0 +// CHECK21-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK21-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK21-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 5 +// CHECK21-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 6 +// CHECK21-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 +// CHECK21-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 7 +// CHECK21-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 8 +// CHECK21-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 8 +// CHECK21-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 8 +// CHECK21-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i64 0, i64 0 // CHECK21-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[ARRAYDECAY]], i64 16) ] // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK21-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK21-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK21-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK21-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK21-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !24 -// CHECK21-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK21-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 4, !llvm.access.group !24 -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 -// CHECK21-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double -// CHECK21-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 -// CHECK21-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK21-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !24 -// CHECK21-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK21-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 -// CHECK21-NEXT: [[CONV11:%.*]] = fpext float [[TMP18]] to double -// CHECK21-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK21-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK21-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 -// CHECK21-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 -// CHECK21-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 -// CHECK21-NEXT: [[ADD16:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK21-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 -// CHECK21-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK21-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP20]] -// CHECK21-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i64 3 -// CHECK21-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 -// CHECK21-NEXT: [[ADD19:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK21-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 -// CHECK21-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !24 -// CHECK21-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK21-NEXT: store i64 [[ADD20]], i64* [[X]], align 8, !llvm.access.group !24 -// CHECK21-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK21-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !24 -// CHECK21-NEXT: [[CONV21:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK21-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK21-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK21-NEXT: store i8 [[CONV23]], i8* [[Y]], align 8, !llvm.access.group !24 +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !24 +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK21-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !24 +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i64 0, i64 2 +// CHECK21-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 +// CHECK21-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK21-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK21-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK21-NEXT: store float [[CONV4]], float* [[ARRAYIDX]], align 4, !llvm.access.group !24 +// CHECK21-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 3 +// CHECK21-NEXT: [[TMP29:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !24 +// CHECK21-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK21-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 +// CHECK21-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float +// CHECK21-NEXT: store float [[CONV8]], float* [[ARRAYIDX5]], align 4, !llvm.access.group !24 +// CHECK21-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i64 0, i64 1 +// CHECK21-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX9]], i64 0, i64 2 +// CHECK21-NEXT: [[TMP30:%.*]] = load double, double* [[ARRAYIDX10]], align 8, !llvm.access.group !24 +// CHECK21-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK21-NEXT: store double [[ADD11]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !24 +// CHECK21-NEXT: [[TMP31:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK21-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 [[TMP31]] +// CHECK21-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX12]], i64 3 +// CHECK21-NEXT: [[TMP32:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !24 +// CHECK21-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK21-NEXT: store double [[ADD14]], double* [[ARRAYIDX13]], align 8, !llvm.access.group !24 +// CHECK21-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP33:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !24 +// CHECK21-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK21-NEXT: store i64 [[ADD15]], i64* [[X]], align 8, !llvm.access.group !24 +// CHECK21-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP34:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !24 +// CHECK21-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK21-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK21-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK21-NEXT: store i8 [[CONV18]], i8* [[Y]], align 8, !llvm.access.group !24 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK21-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK21-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK21-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK21-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK21-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]]) -// CHECK21-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK21-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK21-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK21-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 10, i32* [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11597,10 +11850,7 @@ // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK21-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK21-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 @@ -11611,159 +11861,163 @@ // CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK21-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK21-NEXT: [[CONV4:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK21-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK21-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 -// CHECK21-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK21-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 -// CHECK21-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 -// CHECK21-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* -// CHECK21-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 -// CHECK21-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 -// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]]) +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK21-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK21-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV2]], align 2 +// CHECK21-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV3]], align 1 +// CHECK21-NEXT: store i8 [[TMP8]], i8* [[TMP7]], align 2 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK21-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP9]], align 8 +// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[N:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK21-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK21-NEXT: [[I8:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 -// CHECK21-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* -// CHECK21-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK21-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK21-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] -// CHECK21-NEXT: [[SUB6:%.*]] = sub i32 [[SUB]], 1 -// CHECK21-NEXT: [[ADD:%.*]] = add i32 [[SUB6]], 1 +// CHECK21-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK21-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK21-NEXT: store i32 [[TMP4]], i32* [[N]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP5]], align 8 +// CHECK21-NEXT: store i16 [[TMP6]], i16* [[AA]], align 2 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 2 +// CHECK21-NEXT: store i8 [[TMP8]], i8* [[AAA]], align 1 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK21-NEXT: [[TMP10:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[A]], align 4 +// CHECK21-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[N]], align 4 +// CHECK21-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK21-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK21-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK21-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 -// CHECK21-NEXT: [[SUB7:%.*]] = sub i32 [[DIV]], 1 -// CHECK21-NEXT: store i32 [[SUB7]], i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: store i32 [[TMP5]], i32* [[I]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK21-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK21-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: store i32 [[TMP15]], i32* [[I]], align 4 +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK21-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK21: omp.precond.then: // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK21-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK21-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK21-NEXT: [[CMP9:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] -// CHECK21-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK21-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK21-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK21-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK21-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK21-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK21-NEXT: [[ADD10:%.*]] = add i32 [[TMP17]], 1 -// CHECK21-NEXT: [[CMP11:%.*]] = icmp ult i32 [[TMP16]], [[ADD10]] -// CHECK21-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK21-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] +// CHECK21-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !27 -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK21-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK21-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK21-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !27 -// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !27 -// CHECK21-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK21-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 4, !llvm.access.group !27 -// CHECK21-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2, !llvm.access.group !27 -// CHECK21-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 -// CHECK21-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 -// CHECK21-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK21-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2, !llvm.access.group !27 -// CHECK21-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1, !llvm.access.group !27 -// CHECK21-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK21-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 -// CHECK21-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK21-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1, !llvm.access.group !27 -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 -// CHECK21-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK21-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK21-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] +// CHECK21-NEXT: store i32 [[ADD9]], i32* [[I5]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[TMP30:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK21-NEXT: store i32 [[ADD10]], i32* [[A]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[TMP31:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !27 +// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 +// CHECK21-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK21-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 +// CHECK21-NEXT: store i16 [[CONV12]], i16* [[AA]], align 2, !llvm.access.group !27 +// CHECK21-NEXT: [[TMP32:%.*]] = load i8, i8* [[AAA]], align 1, !llvm.access.group !27 +// CHECK21-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK21-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 +// CHECK21-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 +// CHECK21-NEXT: store i8 [[CONV15]], i8* [[AAA]], align 1, !llvm.access.group !27 +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP10]], i64 0, i64 2 +// CHECK21-NEXT: [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK21-NEXT: store i32 [[ADD16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK21-NEXT: [[ADD21:%.*]] = add i32 [[TMP24]], 1 -// CHECK21-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 +// CHECK21-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK21-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK21-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK21-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK21-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: -// CHECK21-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK21-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: [[SUB22:%.*]] = sub i32 [[TMP30]], [[TMP31]] -// CHECK21-NEXT: [[SUB23:%.*]] = sub i32 [[SUB22]], 1 -// CHECK21-NEXT: [[ADD24:%.*]] = add i32 [[SUB23]], 1 -// CHECK21-NEXT: [[DIV25:%.*]] = udiv i32 [[ADD24]], 1 -// CHECK21-NEXT: [[MUL26:%.*]] = mul i32 [[DIV25]], 1 -// CHECK21-NEXT: [[ADD27:%.*]] = add i32 [[TMP29]], [[MUL26]] -// CHECK21-NEXT: store i32 [[ADD27]], i32* [[I8]], align 4 +// CHECK21-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] +// CHECK21-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 +// CHECK21-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 +// CHECK21-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 +// CHECK21-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 +// CHECK21-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] +// CHECK21-NEXT: store i32 [[ADD23]], i32* [[I5]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK21: .omp.final.done: // CHECK21-NEXT: br label [[OMP_PRECOND_END]] @@ -11780,8 +12034,7 @@ // CHECK21-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 // CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK21-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK21-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK21-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -11794,31 +12047,34 @@ // CHECK21-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK21-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK21-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK21-NEXT: store i32 [[TMP4]], i32* [[CONV4]], align 4 -// CHECK21-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV3]], align 1 -// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 -// CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK21-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK21-NEXT: store i64 [[TMP2]], i64* [[TMP8]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK21-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK21-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV3]], align 1 +// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK21-NEXT: store i8 [[FROMBOOL]], i8* [[CONV5]], align 1 -// CHECK21-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]], i64 [[TMP7]]) +// CHECK21-NEXT: store i8 [[FROMBOOL]], i8* [[TMP10]], align 8 +// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK21-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK21-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11828,120 +12084,126 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK21-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK21-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK21-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 6 +// CHECK21-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP11]], align 8 +// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK21-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 9 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 1 -// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK21-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP18:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK21-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 -// CHECK21-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK21-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 +// CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !30 -// CHECK21-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double -// CHECK21-NEXT: [[ADD6:%.*]] = fadd double [[CONV5]], 1.500000e+00 -// CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: store double [[ADD6]], double* [[A]], align 8, !llvm.access.group !30 -// CHECK21-NEXT: [[A7:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP14:%.*]] = load double, double* [[A7]], align 8, !llvm.access.group !30 -// CHECK21-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK21-NEXT: store double [[INC]], double* [[A7]], align 8, !llvm.access.group !30 -// CHECK21-NEXT: [[CONV8:%.*]] = fptosi double [[INC]] to i16 -// CHECK21-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP15]] -// CHECK21-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK21-NEXT: store i16 [[CONV8]], i16* [[ARRAYIDX9]], align 2, !llvm.access.group !30 +// CHECK21-NEXT: [[TMP22:%.*]] = load i32, i32* [[B]], align 4, !llvm.access.group !30 +// CHECK21-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double +// CHECK21-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: store double [[ADD3]], double* [[A]], align 8, !llvm.access.group !30 +// CHECK21-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP23:%.*]] = load double, double* [[A4]], align 8, !llvm.access.group !30 +// CHECK21-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK21-NEXT: store double [[INC]], double* [[A4]], align 8, !llvm.access.group !30 +// CHECK21-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK21-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i64 [[TMP24]] +// CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK21-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2, !llvm.access.group !30 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK21-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK21-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK21-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK21-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_IF_END:%.*]] // CHECK21: omp_if.else: -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND11:%.*]] -// CHECK21: omp.inner.for.cond11: -// CHECK21-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK21-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY13:%.*]], label [[OMP_INNER_FOR_END27:%.*]] -// CHECK21: omp.inner.for.body13: -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[MUL14:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK21-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] -// CHECK21-NEXT: store i32 [[ADD15]], i32* [[I]], align 4 -// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK21-NEXT: [[CONV16:%.*]] = sitofp i32 [[TMP20]] to double -// CHECK21-NEXT: [[ADD17:%.*]] = fadd double [[CONV16]], 1.500000e+00 -// CHECK21-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: store double [[ADD17]], double* [[A18]], align 8 -// CHECK21-NEXT: [[A19:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP21:%.*]] = load double, double* [[A19]], align 8 -// CHECK21-NEXT: [[INC20:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK21-NEXT: store double [[INC20]], double* [[A19]], align 8 -// CHECK21-NEXT: [[CONV21:%.*]] = fptosi double [[INC20]] to i16 -// CHECK21-NEXT: [[TMP22:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK21-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP22]] -// CHECK21-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX22]], i64 1 -// CHECK21-NEXT: store i16 [[CONV21]], i16* [[ARRAYIDX23]], align 2 -// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE24:%.*]] -// CHECK21: omp.body.continue24: -// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC25:%.*]] -// CHECK21: omp.inner.for.inc25: -// CHECK21-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK21-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND11]], !llvm.loop [[LOOP33:![0-9]+]] -// CHECK21: omp.inner.for.end27: +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK21: omp.inner.for.cond8: +// CHECK21-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK21-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END24:%.*]] +// CHECK21: omp.inner.for.body10: +// CHECK21-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP28]], 1 +// CHECK21-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK21-NEXT: store i32 [[ADD12]], i32* [[I]], align 4 +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, i32* [[B]], align 4 +// CHECK21-NEXT: [[CONV13:%.*]] = sitofp i32 [[TMP29]] to double +// CHECK21-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.500000e+00 +// CHECK21-NEXT: [[A15:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: store double [[ADD14]], double* [[A15]], align 8 +// CHECK21-NEXT: [[A16:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP30:%.*]] = load double, double* [[A16]], align 8 +// CHECK21-NEXT: [[INC17:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK21-NEXT: store double [[INC17]], double* [[A16]], align 8 +// CHECK21-NEXT: [[CONV18:%.*]] = fptosi double [[INC17]] to i16 +// CHECK21-NEXT: [[TMP31:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK21-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i64 [[TMP31]] +// CHECK21-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX19]], i64 1 +// CHECK21-NEXT: store i16 [[CONV18]], i16* [[ARRAYIDX20]], align 2 +// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE21:%.*]] +// CHECK21: omp.body.continue21: +// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC22:%.*]] +// CHECK21: omp.inner.for.inc22: +// CHECK21-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK21-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK21: omp.inner.for.end24: // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK21-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) +// CHECK21-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK21-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 10, i32* [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11955,34 +12217,33 @@ // CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK21-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK21-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK21-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK21-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* -// CHECK21-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK21-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK21-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 8 +// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11992,70 +12253,74 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK21-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK21-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK21-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK21-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK21-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 -// CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !35 -// CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK21-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !35 -// CHECK21-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !35 -// CHECK21-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 -// CHECK21-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK21-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !35 -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !35 -// CHECK21-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK21-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !35 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !35 +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK21-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !35 +// CHECK21-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !35 +// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK21-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK21-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !35 +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 2 +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !35 +// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK21-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK21-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK21-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK21-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK21-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK21-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK21-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 10, i32* [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12069,7 +12334,7 @@ // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK23-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) // CHECK23-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK23-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -12078,20 +12343,20 @@ // CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK23-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK23-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK23-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP4]]) +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK23-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 2 +// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12101,54 +12366,57 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK23-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK23-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK23-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK23-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK23-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK23-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 10, i32* [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12160,23 +12428,23 @@ // CHECK23-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK23-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK23-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK23-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK23-NEXT: store i16 [[TMP1]], i16* [[TMP0]], align 2 +// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12186,59 +12454,62 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK23-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK23-NEXT: store i16 [[TMP2]], i16* [[AA]], align 2 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK23-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK23-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !19 -// CHECK23-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK23-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK23-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !19 +// CHECK23-NEXT: [[TMP11:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !19 +// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK23-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 +// CHECK23-NEXT: store i16 [[CONV3]], i16* [[AA]], align 2, !llvm.access.group !19 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK23-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK23-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK23-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK23-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 10, i32* [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12251,29 +12522,28 @@ // CHECK23-NEXT: entry: // CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK23-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK23-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32 [[TMP3]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK23-NEXT: store i16 [[TMP3]], i16* [[TMP2]], align 4 +// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12283,63 +12553,68 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK23-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK23-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK23-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !22 -// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK23-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !22 -// CHECK23-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !22 -// CHECK23-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 -// CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK23-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK23-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !22 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !22 +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK23-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !22 +// CHECK23-NEXT: [[TMP14:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !22 +// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK23-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK23-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !22 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK23-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK23-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK23-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK23-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 10, i32* [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12359,7 +12634,7 @@ // CHECK23-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 // CHECK23-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK23-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 // CHECK23-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -12377,27 +12652,36 @@ // CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 // CHECK23-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 // CHECK23-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x float]*, i32, float*, [5 x [10 x double]]*, i32, i32, double*, %struct.TT*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP9]], [10 x float]* [[TMP0]], i32 [[TMP1]], float* [[TMP2]], [5 x [10 x double]]* [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], double* [[TMP6]], %struct.TT* [[TMP7]]) +// CHECK23-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: store [10 x float]* [[TMP0]], [10 x float]** [[TMP10]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store i32 [[TMP1]], i32* [[TMP11]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store float* [[TMP2]], float** [[TMP12]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK23-NEXT: store [5 x [10 x double]]* [[TMP3]], [5 x [10 x double]]** [[TMP13]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK23-NEXT: store i32 [[TMP4]], i32* [[TMP14]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK23-NEXT: store i32 [[TMP5]], i32* [[TMP15]], align 4 +// CHECK23-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK23-NEXT: store double* [[TMP6]], double** [[TMP16]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK23-NEXT: store %struct.TT* [[TMP7]], %struct.TT** [[TMP17]], align 4 +// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 -// CHECK23-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 -// CHECK23-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 -// CHECK23-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 -// CHECK23-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12407,107 +12691,111 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK23-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 -// CHECK23-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 -// CHECK23-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 -// CHECK23-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK23-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load [10 x float]*, [10 x float]** [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 5 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 6 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 7 +// CHECK23-NEXT: [[TMP16:%.*]] = load double*, double** [[TMP15]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 8 +// CHECK23-NEXT: [[TMP18:%.*]] = load %struct.TT*, %struct.TT** [[TMP17]], align 4 +// CHECK23-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i32 0, i32 0 // CHECK23-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[ARRAYDECAY]], i32 16) ] // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK23-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 -// CHECK23-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK23-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK23-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK23-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK23-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !25 -// CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK23-NEXT: store i32 [[ADD6]], i32* [[A_ADDR]], align 4, !llvm.access.group !25 -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !25 -// CHECK23-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double -// CHECK23-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !25 +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK23-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !25 +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP4]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !25 +// CHECK23-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK23-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK23-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK23-NEXT: store float [[CONV4]], float* [[ARRAYIDX]], align 4, !llvm.access.group !25 +// CHECK23-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 3 +// CHECK23-NEXT: [[TMP29:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !25 +// CHECK23-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK23-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK23-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK23-NEXT: store float [[CONV8]], float* [[ARRAYIDX]], align 4, !llvm.access.group !25 -// CHECK23-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK23-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4, !llvm.access.group !25 -// CHECK23-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double -// CHECK23-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 -// CHECK23-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK23-NEXT: store float [[CONV12]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !25 -// CHECK23-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 -// CHECK23-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !25 -// CHECK23-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK23-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !25 -// CHECK23-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK23-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP20]] -// CHECK23-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX16]], i32 3 -// CHECK23-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !25 -// CHECK23-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK23-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !25 -// CHECK23-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !25 -// CHECK23-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK23-NEXT: store i64 [[ADD19]], i64* [[X]], align 4, !llvm.access.group !25 -// CHECK23-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK23-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !25 -// CHECK23-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK23-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 -// CHECK23-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK23-NEXT: store i8 [[CONV22]], i8* [[Y]], align 4, !llvm.access.group !25 +// CHECK23-NEXT: store float [[CONV8]], float* [[ARRAYIDX5]], align 4, !llvm.access.group !25 +// CHECK23-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP10]], i32 0, i32 1 +// CHECK23-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX9]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP30:%.*]] = load double, double* [[ARRAYIDX10]], align 8, !llvm.access.group !25 +// CHECK23-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK23-NEXT: store double [[ADD11]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !25 +// CHECK23-NEXT: [[TMP31:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK23-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 [[TMP31]] +// CHECK23-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX12]], i32 3 +// CHECK23-NEXT: [[TMP32:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !25 +// CHECK23-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK23-NEXT: store double [[ADD14]], double* [[ARRAYIDX13]], align 8, !llvm.access.group !25 +// CHECK23-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP18]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP33:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !25 +// CHECK23-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK23-NEXT: store i64 [[ADD15]], i64* [[X]], align 4, !llvm.access.group !25 +// CHECK23-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP18]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP34:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !25 +// CHECK23-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK23-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK23-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK23-NEXT: store i8 [[CONV18]], i8* [[Y]], align 4, !llvm.access.group !25 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK23-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK23-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK23-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK23-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK23-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]]) -// CHECK23-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK23-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK23-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK23-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 10, i32* [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12523,10 +12811,7 @@ // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 @@ -12535,155 +12820,163 @@ // CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* // CHECK23-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK23-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK23-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 -// CHECK23-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* -// CHECK23-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 -// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]]) +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK23-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK23-NEXT: store i8 [[TMP8]], i8* [[TMP7]], align 2 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK23-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP9]], align 4 +// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[N:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK23-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 -// CHECK23-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* -// CHECK23-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] -// CHECK23-NEXT: [[SUB4:%.*]] = sub i32 [[SUB]], 1 -// CHECK23-NEXT: [[ADD:%.*]] = add i32 [[SUB4]], 1 +// CHECK23-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK23-NEXT: store i32 [[TMP4]], i32* [[N]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP5]], align 4 +// CHECK23-NEXT: store i16 [[TMP6]], i16* [[AA]], align 2 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 2 +// CHECK23-NEXT: store i8 [[TMP8]], i8* [[AAA]], align 1 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[A]], align 4 +// CHECK23-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[N]], align 4 +// CHECK23-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK23-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK23-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK23-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 -// CHECK23-NEXT: [[SUB5:%.*]] = sub i32 [[DIV]], 1 -// CHECK23-NEXT: store i32 [[SUB5]], i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: store i32 [[TMP5]], i32* [[I]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK23-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK23-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: store i32 [[TMP15]], i32* [[I]], align 4 +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK23-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK23: omp.precond.then: // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK23-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK23-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK23-NEXT: [[CMP7:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] -// CHECK23-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK23-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK23-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK23-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK23-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 -// CHECK23-NEXT: [[ADD8:%.*]] = add i32 [[TMP17]], 1 -// CHECK23-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP16]], [[ADD8]] -// CHECK23-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK23-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK23-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] +// CHECK23-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !28 -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK23-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK23-NEXT: [[ADD10:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK23-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4, !llvm.access.group !28 -// CHECK23-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !28 -// CHECK23-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK23-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !28 -// CHECK23-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !28 -// CHECK23-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 -// CHECK23-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 -// CHECK23-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK23-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !28 -// CHECK23-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1, !llvm.access.group !28 -// CHECK23-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK23-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 -// CHECK23-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK23-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1, !llvm.access.group !28 -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 -// CHECK23-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK23-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK23-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] +// CHECK23-NEXT: store i32 [[ADD9]], i32* [[I5]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[TMP30:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK23-NEXT: store i32 [[ADD10]], i32* [[A]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[TMP31:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !28 +// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 +// CHECK23-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK23-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 +// CHECK23-NEXT: store i16 [[CONV12]], i16* [[AA]], align 2, !llvm.access.group !28 +// CHECK23-NEXT: [[TMP32:%.*]] = load i8, i8* [[AAA]], align 1, !llvm.access.group !28 +// CHECK23-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK23-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 +// CHECK23-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 +// CHECK23-NEXT: store i8 [[CONV15]], i8* [[AAA]], align 1, !llvm.access.group !28 +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP10]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK23-NEXT: store i32 [[ADD16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK23-NEXT: [[ADD19:%.*]] = add i32 [[TMP24]], 1 -// CHECK23-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 +// CHECK23-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK23-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK23-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK23-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK23-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: -// CHECK23-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK23-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: [[SUB20:%.*]] = sub i32 [[TMP30]], [[TMP31]] -// CHECK23-NEXT: [[SUB21:%.*]] = sub i32 [[SUB20]], 1 -// CHECK23-NEXT: [[ADD22:%.*]] = add i32 [[SUB21]], 1 -// CHECK23-NEXT: [[DIV23:%.*]] = udiv i32 [[ADD22]], 1 -// CHECK23-NEXT: [[MUL24:%.*]] = mul i32 [[DIV23]], 1 -// CHECK23-NEXT: [[ADD25:%.*]] = add i32 [[TMP29]], [[MUL24]] -// CHECK23-NEXT: store i32 [[ADD25]], i32* [[I6]], align 4 +// CHECK23-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] +// CHECK23-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 +// CHECK23-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 +// CHECK23-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 +// CHECK23-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 +// CHECK23-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] +// CHECK23-NEXT: store i32 [[ADD23]], i32* [[I5]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK23: .omp.final.done: // CHECK23-NEXT: br label [[OMP_PRECOND_END]] @@ -12700,8 +12993,7 @@ // CHECK23-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK23-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK23-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK23-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -12713,30 +13005,34 @@ // CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK23-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 // CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 -// CHECK23-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store %struct.S1* [[TMP0]], %struct.S1** [[TMP4]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store i32 [[TMP2]], i32* [[TMP8]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK23-NEXT: store i16* [[TMP3]], i16** [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK23-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK23-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]], i32 [[TMP7]]) +// CHECK23-NEXT: store i8 [[FROMBOOL]], i8* [[TMP10]], align 4 +// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK23-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK23-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12746,119 +13042,126 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK23-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK23-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i16*, i16** [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK23-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP11]], align 4 +// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK23-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 9 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK23-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK23-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 -// CHECK23-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK23-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK23-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 +// CHECK23-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK23-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !31 -// CHECK23-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP13]] to double -// CHECK23-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 -// CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: store double [[ADD5]], double* [[A]], align 4, !llvm.access.group !31 -// CHECK23-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP14:%.*]] = load double, double* [[A6]], align 4, !llvm.access.group !31 -// CHECK23-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK23-NEXT: store double [[INC]], double* [[A6]], align 4, !llvm.access.group !31 -// CHECK23-NEXT: [[CONV7:%.*]] = fptosi double [[INC]] to i16 -// CHECK23-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP15]] -// CHECK23-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK23-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2, !llvm.access.group !31 +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, i32* [[B]], align 4, !llvm.access.group !31 +// CHECK23-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double +// CHECK23-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: store double [[ADD3]], double* [[A]], align 4, !llvm.access.group !31 +// CHECK23-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP23:%.*]] = load double, double* [[A4]], align 4, !llvm.access.group !31 +// CHECK23-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK23-NEXT: store double [[INC]], double* [[A4]], align 4, !llvm.access.group !31 +// CHECK23-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK23-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 [[TMP24]] +// CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK23-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2, !llvm.access.group !31 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK23-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK23-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK23-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK23-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK23-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_IF_END:%.*]] // CHECK23: omp_if.else: -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] -// CHECK23: omp.inner.for.cond10: -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK23-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END26:%.*]] -// CHECK23: omp.inner.for.body12: -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK23-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] -// CHECK23-NEXT: store i32 [[ADD14]], i32* [[I]], align 4 -// CHECK23-NEXT: [[TMP20:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK23-NEXT: [[CONV15:%.*]] = sitofp i32 [[TMP20]] to double -// CHECK23-NEXT: [[ADD16:%.*]] = fadd double [[CONV15]], 1.500000e+00 -// CHECK23-NEXT: [[A17:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: store double [[ADD16]], double* [[A17]], align 4 -// CHECK23-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP21:%.*]] = load double, double* [[A18]], align 4 -// CHECK23-NEXT: [[INC19:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK23-NEXT: store double [[INC19]], double* [[A18]], align 4 -// CHECK23-NEXT: [[CONV20:%.*]] = fptosi double [[INC19]] to i16 -// CHECK23-NEXT: [[TMP22:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK23-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP22]] -// CHECK23-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX21]], i32 1 -// CHECK23-NEXT: store i16 [[CONV20]], i16* [[ARRAYIDX22]], align 2 -// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE23:%.*]] -// CHECK23: omp.body.continue23: -// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC24:%.*]] -// CHECK23: omp.inner.for.inc24: -// CHECK23-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK23-NEXT: store i32 [[ADD25]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP34:![0-9]+]] -// CHECK23: omp.inner.for.end26: +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK23: omp.inner.for.cond8: +// CHECK23-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK23-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END24:%.*]] +// CHECK23: omp.inner.for.body10: +// CHECK23-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP28]], 1 +// CHECK23-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK23-NEXT: store i32 [[ADD12]], i32* [[I]], align 4 +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, i32* [[B]], align 4 +// CHECK23-NEXT: [[CONV13:%.*]] = sitofp i32 [[TMP29]] to double +// CHECK23-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.500000e+00 +// CHECK23-NEXT: [[A15:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: store double [[ADD14]], double* [[A15]], align 4 +// CHECK23-NEXT: [[A16:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP30:%.*]] = load double, double* [[A16]], align 4 +// CHECK23-NEXT: [[INC17:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK23-NEXT: store double [[INC17]], double* [[A16]], align 4 +// CHECK23-NEXT: [[CONV18:%.*]] = fptosi double [[INC17]] to i16 +// CHECK23-NEXT: [[TMP31:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK23-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, i16* [[TMP10]], i32 [[TMP31]] +// CHECK23-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX19]], i32 1 +// CHECK23-NEXT: store i16 [[CONV18]], i16* [[ARRAYIDX20]], align 2 +// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE21:%.*]] +// CHECK23: omp.body.continue21: +// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC22:%.*]] +// CHECK23: omp.inner.for.inc22: +// CHECK23-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK23-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK23: omp.inner.for.end24: // CHECK23-NEXT: br label [[OMP_IF_END]] // CHECK23: omp_if.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK23-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) +// CHECK23-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK23-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 10, i32* [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12872,32 +13175,32 @@ // CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK23-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK23-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* -// CHECK23-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]]) +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK23-NEXT: store i16 [[TMP4]], i16* [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP5]], align 4 +// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12907,69 +13210,74 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK23-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK23-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK23-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK23-NEXT: store i16 [[TMP4]], i16* [[AA]], align 2 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !36 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !36 -// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK23-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !36 -// CHECK23-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !36 -// CHECK23-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 -// CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK23-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK23-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !36 -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !36 -// CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK23-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !36 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[A]], align 4, !llvm.access.group !36 +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK23-NEXT: store i32 [[ADD2]], i32* [[A]], align 4, !llvm.access.group !36 +// CHECK23-NEXT: [[TMP16:%.*]] = load i16, i16* [[AA]], align 2, !llvm.access.group !36 +// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK23-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK23-NEXT: store i16 [[CONV4]], i16* [[AA]], align 2, !llvm.access.group !36 +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !36 +// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK23-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK23-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK23-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK23-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK23-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK23-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 10, i32* [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp @@ -143,18 +143,21 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -166,71 +169,73 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK1-NEXT: store i32 [[ADD6]], i32* [[J]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: store i32 456, i32* [[J]], align 4 @@ -295,18 +300,21 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -318,69 +326,71 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], i32* [[J]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i32 0, i32 [[TMP13]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: store i32 456, i32* [[J]], align 4 @@ -672,8 +682,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -684,177 +693,183 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[M_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[M_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], i64* [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP9]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[M_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[M]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]] -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB11]], i64* [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 // CHECK9-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK9-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP19]], [[CONV20]] -// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK9-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK9-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK9-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK9-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP22]], [[CONV27]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK9-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK9-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK9-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP21]], [[MUL33]] -// CHECK9-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK9-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK9-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I13]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK9-NEXT: [[TMP26:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[J14]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM38]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX39]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], i32* [[I9]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], i32* [[J10]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[I9]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[J10]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX35]], align 4, !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP28]], 1 -// CHECK9-NEXT: store i64 [[ADD40]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK9-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB41:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB37:%.*]] = sub nsw i32 [[TMP41]], 0 +// CHECK9-NEXT: [[DIV38:%.*]] = sdiv i32 [[SUB37]], 1 +// CHECK9-NEXT: [[MUL39:%.*]] = mul nsw i32 [[DIV38]], 1 +// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL39]] +// CHECK9-NEXT: store i32 [[ADD40]], i32* [[I9]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB41:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV42:%.*]] = sdiv i32 [[SUB41]], 1 // CHECK9-NEXT: [[MUL43:%.*]] = mul nsw i32 [[DIV42]], 1 // CHECK9-NEXT: [[ADD44:%.*]] = add nsw i32 0, [[MUL43]] -// CHECK9-NEXT: store i32 [[ADD44]], i32* [[I13]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB45:%.*]] = sub nsw i32 [[TMP34]], 0 -// CHECK9-NEXT: [[DIV46:%.*]] = sdiv i32 [[SUB45]], 1 -// CHECK9-NEXT: [[MUL47:%.*]] = mul nsw i32 [[DIV46]], 1 -// CHECK9-NEXT: [[ADD48:%.*]] = add nsw i32 0, [[MUL47]] -// CHECK9-NEXT: store i32 [[ADD48]], i32* [[J14]], align 4 +// CHECK9-NEXT: store i32 [[ADD44]], i32* [[J10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -898,18 +913,21 @@ // CHECK9-SAME: ([10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x [2 x i32]]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x [2 x i32]]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x [2 x i32]]* [[TMP0]], [10 x [2 x i32]]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -921,70 +939,72 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK9-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK9-NEXT: store i32 [[ADD6]], i32* [[J]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !11 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: store i32 2, i32* [[J]], align 4 @@ -1133,8 +1153,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -1143,171 +1162,181 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[M_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[M_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP9]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[M]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 // CHECK11-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP19]], [[CONV18]] -// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV21]], i32* [[I11]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP22]], [[CONV25]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP21]], [[MUL31]] -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK11-NEXT: store i32 [[CONV35]], i32* [[J12]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I11]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP26:%.*]] = mul nsw i32 [[TMP25]], [[TMP1]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[J12]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i32 [[TMP27]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX36]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK11-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK11-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK11-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK11-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK11-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK11-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV19]], i32* [[I9]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK11-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK11-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK11-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK11-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK11-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK11-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK11-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK11-NEXT: store i32 [[CONV33]], i32* [[J10]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[I9]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP34:%.*]] = mul nsw i32 [[TMP33]], [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[J10]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i32 [[TMP35]] +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX34]], align 4, !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP28]], 1 -// CHECK11-NEXT: store i64 [[ADD37]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[ADD35:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK11-NEXT: store i64 [[ADD35]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB38:%.*]] = sub nsw i32 [[TMP33]], 0 -// CHECK11-NEXT: [[DIV39:%.*]] = sdiv i32 [[SUB38]], 1 -// CHECK11-NEXT: [[MUL40:%.*]] = mul nsw i32 [[DIV39]], 1 -// CHECK11-NEXT: [[ADD41:%.*]] = add nsw i32 0, [[MUL40]] -// CHECK11-NEXT: store i32 [[ADD41]], i32* [[I11]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB42:%.*]] = sub nsw i32 [[TMP34]], 0 -// CHECK11-NEXT: [[DIV43:%.*]] = sdiv i32 [[SUB42]], 1 -// CHECK11-NEXT: [[MUL44:%.*]] = mul nsw i32 [[DIV43]], 1 -// CHECK11-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] -// CHECK11-NEXT: store i32 [[ADD45]], i32* [[J12]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB36:%.*]] = sub nsw i32 [[TMP41]], 0 +// CHECK11-NEXT: [[DIV37:%.*]] = sdiv i32 [[SUB36]], 1 +// CHECK11-NEXT: [[MUL38:%.*]] = mul nsw i32 [[DIV37]], 1 +// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]] +// CHECK11-NEXT: store i32 [[ADD39]], i32* [[I9]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB40:%.*]] = sub nsw i32 [[TMP42]], 0 +// CHECK11-NEXT: [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1 +// CHECK11-NEXT: [[MUL42:%.*]] = mul nsw i32 [[DIV41]], 1 +// CHECK11-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL42]] +// CHECK11-NEXT: store i32 [[ADD43]], i32* [[J10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -1351,18 +1380,21 @@ // CHECK11-SAME: ([10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x [2 x i32]]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x [2 x i32]]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x [2 x i32]]* [[TMP0]], [10 x [2 x i32]]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1374,68 +1406,70 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], i32* [[J]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP2]], i32 0, i32 [[TMP13]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !12 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: store i32 2, i32* [[J]], align 4 diff --git a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp @@ -215,18 +215,21 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -236,59 +239,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -300,18 +305,21 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -321,59 +329,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -385,18 +395,21 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -406,76 +419,78 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -583,18 +598,21 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -604,58 +622,60 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !7 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -667,18 +687,21 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -688,58 +711,60 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -751,18 +776,21 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -772,75 +800,77 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP13]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1338,29 +1368,31 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1373,82 +1405,85 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1466,29 +1501,31 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1501,82 +1538,85 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1595,8 +1635,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -1605,139 +1644,144 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK9-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] +// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK9-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK9-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 -// CHECK9-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 -// CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] -// CHECK9-NEXT: store i32 [[ADD15]], i32* [[I5]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK9-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 +// CHECK9-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 +// CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK9-NEXT: store i32 [[ADD14]], i32* [[I4]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -1824,18 +1868,21 @@ // CHECK9-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1845,58 +1892,60 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1908,18 +1957,21 @@ // CHECK9-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1929,58 +1981,60 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1992,18 +2046,21 @@ // CHECK9-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2013,75 +2070,77 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2330,27 +2389,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2363,80 +2425,84 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP16]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -2454,27 +2520,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2487,80 +2556,84 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP16]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -2579,33 +2652,35 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2618,99 +2693,105 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 [[TMP26]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 // CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] @@ -2801,18 +2882,21 @@ // CHECK11-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2822,57 +2906,59 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2884,18 +2970,21 @@ // CHECK11-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2905,57 +2994,59 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2967,18 +3058,21 @@ // CHECK11-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2988,74 +3082,76 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 [[TMP13]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp @@ -333,8 +333,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -345,154 +344,160 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 [[IDXPROM8]] -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !5 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8* +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group !5 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP28]] +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[SIVAR]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK1-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP27]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP35]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done9: // CHECK1-NEXT: ret void // // @@ -691,7 +696,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 @@ -701,24 +706,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP7]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -726,125 +734,128 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca %struct.S.0*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 8 +// CHECK1-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S.0]* [[TMP6]] to %struct.S.0* +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP11]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP13:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP13]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[ARRAYIDX10]] to i8* -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S.0* [[TMP18]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !11 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP4]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[ARRAYIDX7]] to i8* +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[TMP24]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP27]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP33]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -1135,8 +1146,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1145,148 +1155,158 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC1]] to i8* -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP17]] -// CHECK3-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* -// CHECK3-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false), !llvm.access.group !6 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP24]] +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP25]] +// CHECK3-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK3-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group !6 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], [[TMP28]] +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK3-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP27]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP35]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1484,7 +1504,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 @@ -1493,23 +1513,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 // CHECK3-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP7]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR5]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1517,122 +1541,126 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca %struct.S.0*, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 4 +// CHECK3-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S.0]* [[TMP6]] to %struct.S.0* +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP11]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP13:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP13]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP17]] -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* -// CHECK3-NEXT: [[TMP21:%.*]] = bitcast %struct.S.0* [[TMP18]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !12 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP23]] +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP4]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP25]] +// CHECK3-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* +// CHECK3-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[TMP24]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP27]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP33]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // @@ -2509,9 +2537,7 @@ // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK9-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 @@ -2519,34 +2545,31 @@ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load volatile i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* @g, align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR6:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 @@ -2555,71 +2578,76 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[G_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[G]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[G1]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[SIVAR]], align 4 +// CHECK9-NEXT: store i32* [[G1]], i32** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: store volatile i32 1, i32* [[TMP8]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP12]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store i32 1, i32* [[G]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store volatile i32 1, i32* [[TMP15]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: store i32 2, i32* [[SIVAR]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[G]], i32** [[TMP16]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store i32* [[TMP18]], i32** [[TMP17]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP19]], align 8, !llvm.access.group !4 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK9-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp @@ -160,10 +160,7 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 @@ -173,141 +170,144 @@ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* // CHECK1-NEXT: store double* [[CONV1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double, double* [[CONV]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[G_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP0]], double* [[CONV4]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load volatile double, double* [[TMP2]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[G1_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP3]], double* [[CONV5]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV6]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* [[CONV3]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* -// CHECK1-NEXT: store float [[TMP7]], float* [[CONV7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load double, double* [[CONV]], align 8 +// CHECK1-NEXT: store double [[TMP1]], double* [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load volatile double, double* [[TMP3]], align 8 +// CHECK1-NEXT: store double [[TMP4]], double* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV2]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float, float* [[CONV3]], align 4 +// CHECK1-NEXT: store float [[TMP8]], float* [[TMP7]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G5:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G16:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR9:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G2:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G13:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], i64* [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[G_ADDR]] to double* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to double* -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* -// CHECK1-NEXT: store double* [[CONV1]], double** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double, double* [[TMP1]], align 8 +// CHECK1-NEXT: store double [[TMP2]], double* [[G]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 8 +// CHECK1-NEXT: store double [[TMP4]], double* [[G1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float, float* [[TMP7]], align 4 +// CHECK1-NEXT: store float [[TMP8]], float* [[SFVAR]], align 4 +// CHECK1-NEXT: store double* [[G1]], double** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[G16]], double** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[G13]], double** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: store double 1.000000e+00, double* [[G5]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[_TMP7]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP9]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: store i32 3, i32* [[SVAR8]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR9]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[G5]], double** [[TMP10]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP7]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: store double* [[TMP12]], double** [[TMP11]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[SVAR8]], i32** [[TMP13]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[SFVAR9]], float** [[TMP14]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store double 1.000000e+00, double* [[G2]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP18:%.*]] = load double*, double** [[_TMP4]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP18]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store i32 3, i32* [[SVAR5]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[G2]], double** [[TMP19]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load double*, double** [[_TMP4]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store double* [[TMP21]], double** [[TMP20]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SVAR5]], i32** [[TMP22]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[SFVAR6]], float** [[TMP23]], align 8, !llvm.access.group !4 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP20:%.*]] = load double, double* [[G5]], align 8 -// CHECK1-NEXT: store volatile double [[TMP20]], double* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load double*, double** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[TMP21]], align 8 -// CHECK1-NEXT: store volatile double [[TMP22]], double* [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR9]], align 4 -// CHECK1-NEXT: store float [[TMP24]], float* [[CONV3]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load double, double* [[G2]], align 8 +// CHECK1-NEXT: store volatile double [[TMP29]], double* [[G]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load double*, double** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load double, double* [[TMP30]], align 8 +// CHECK1-NEXT: store volatile double [[TMP31]], double* [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[SVAR5]], align 4 +// CHECK1-NEXT: store i32 [[TMP32]], i32* [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load float, float* [[SFVAR6]], align 4 +// CHECK1-NEXT: store float [[TMP33]], float* [[SFVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -346,8 +346,7 @@ // CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 @@ -356,27 +355,29 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[SFVAR_ADDR]] to float* // CHECK3-NEXT: store double* [[TMP1]], double** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load float, float* [[CONV]], align 4 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[SFVAR_CASTED]] to float* -// CHECK3-NEXT: store float [[TMP5]], float* [[CONV1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[TMP0]], double* [[TMP2]], i32 [[TMP4]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[TMP0]], double** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[TMP4]], double** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[SVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float, float* [[CONV]], align 4 +// CHECK3-NEXT: store float [[TMP8]], float* [[TMP7]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -384,103 +385,108 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR4:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], i32* [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[SFVAR_ADDR]] to float* -// CHECK3-NEXT: store double* [[TMP1]], double** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float, float* [[TMP7]], align 4 +// CHECK3-NEXT: store float [[TMP8]], float* [[SFVAR]], align 4 +// CHECK3-NEXT: store double* [[TMP4]], double** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store double 1.000000e+00, double* [[G2]], align 8, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP11]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store i32 3, i32* [[SVAR5]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G2]], double** [[TMP12]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store double* [[TMP14]], double** [[TMP13]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR5]], i32** [[TMP15]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[SFVAR6]], float** [[TMP16]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store double 1.000000e+00, double* [[G]], align 8, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP18:%.*]] = load double*, double** [[_TMP2]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP18]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 3, i32* [[SVAR3]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR4]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP19]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load double*, double** [[_TMP2]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store double* [[TMP21]], double** [[TMP20]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR3]], i32** [[TMP22]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[SFVAR4]], float** [[TMP23]], align 4, !llvm.access.group !5 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP22]], double* [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[TMP23]], align 4 -// CHECK3-NEXT: store volatile double [[TMP24]], double* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP25]], i32* [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load float, float* [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP26]], float* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP29]], double* [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP30:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load double, double* [[TMP30]], align 4 +// CHECK3-NEXT: store volatile double [[TMP31]], double* [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[SVAR3]], align 4 +// CHECK3-NEXT: store i32 [[TMP32]], i32* [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load float, float* [[SFVAR4]], align 4 +// CHECK3-NEXT: store float [[TMP33]], float* [[SFVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -669,8 +675,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -682,61 +687,67 @@ // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK9-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP5]], i64 [[TMP7]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* -// CHECK9-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[SVAR]], align 4 +// CHECK9-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -746,115 +757,115 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store %struct.S* [[VAR6]], %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP14:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: [[TMP16:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* -// CHECK9-NEXT: [[TMP17:%.*]] = bitcast %struct.S* [[TMP14]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP16]], i8* align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group !5 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP22:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK9-NEXT: [[TMP24:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* +// CHECK9-NEXT: [[TMP25:%.*]] = bitcast %struct.S* [[TMP22]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i64 4, i1 false), !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP1]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast [2 x %struct.S]* [[S_ARR5]] to %struct.S* -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN13]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN13]], [[TMP29]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK9-NEXT: store i32 [[TMP33]], i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK9-NEXT: [[TMP35:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP36:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN9]], [[TMP37]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP28]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP36]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done14: -// CHECK9-NEXT: [[TMP32:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[TMP3]] to i8* -// CHECK9-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP32]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP35]], i32* [[CONV1]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP37]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done10: +// CHECK9-NEXT: [[TMP40:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = bitcast %struct.S* [[TMP11]] to i8* +// CHECK9-NEXT: [[TMP42:%.*]] = bitcast %struct.S* [[TMP40]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP41]], i8* align 4 [[TMP42]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[SVAR4]], align 4 +// CHECK9-NEXT: store i32 [[TMP43]], i32* [[SVAR]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR5]] -// CHECK9-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN15]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP44]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done16: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done12: // CHECK9-NEXT: ret void // // @@ -1022,7 +1033,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 @@ -1032,24 +1043,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK9-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP7]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1058,27 +1072,30 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 8 +// CHECK9-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1088,113 +1105,113 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP14:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i64 0, i64 [[IDXPROM8]] -// CHECK9-NEXT: [[TMP16:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* -// CHECK9-NEXT: [[TMP17:%.*]] = bitcast %struct.S.0* [[TMP14]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP16]], i8* align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group !11 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* +// CHECK9-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false), !llvm.access.group !11 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP1]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN11]], [[TMP29]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK9-NEXT: [[TMP33:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP34:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR]] to %struct.S.0* +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN8]], [[TMP35]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP28]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP34]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP37:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done12: -// CHECK9-NEXT: [[TMP32:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[TMP3]] to i8* -// CHECK9-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[TMP32]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP35]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP38:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = bitcast %struct.S.0* [[TMP9]] to i8* +// CHECK9-NEXT: [[TMP40:%.*]] = bitcast %struct.S.0* [[TMP38]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP39]], i8* align 4 [[TMP40]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN13]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP41]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done14: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1387,8 +1404,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1398,27 +1414,31 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 // CHECK11-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[SVAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP5]], i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[SVAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1427,28 +1447,34 @@ // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[SVAR]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1458,113 +1484,113 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store %struct.S* [[VAR5]], %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK11-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: store i32 [[TMP12]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP14:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: [[TMP16:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* -// CHECK11-NEXT: [[TMP17:%.*]] = bitcast %struct.S* [[TMP14]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP16]], i8* align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group !6 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP22:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP23]] +// CHECK11-NEXT: [[TMP24:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* +// CHECK11-NEXT: [[TMP25:%.*]] = bitcast %struct.S* [[TMP22]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i32 4, i1 false), !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK11-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], i32* [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP1]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast [2 x %struct.S]* [[S_ARR4]] to %struct.S* -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN11]], [[TMP29]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK11-NEXT: store i32 [[TMP33]], i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK11-NEXT: [[TMP35:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN8]], [[TMP37]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP28]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP36]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP32:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[TMP3]] to i8* -// CHECK11-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP32]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP35]], i32* [[SVAR_ADDR]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP37]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done9: +// CHECK11-NEXT: [[TMP40:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = bitcast %struct.S* [[TMP11]] to i8* +// CHECK11-NEXT: [[TMP42:%.*]] = bitcast %struct.S* [[TMP40]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP41]], i8* align 4 [[TMP42]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, i32* [[SVAR4]], align 4 +// CHECK11-NEXT: store i32 [[TMP43]], i32* [[SVAR]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP44]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done11: // CHECK11-NEXT: ret void // // @@ -1731,7 +1757,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 @@ -1740,23 +1766,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 // CHECK11-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP7]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1765,26 +1795,30 @@ // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 4 +// CHECK11-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1794,111 +1828,111 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: store i32 [[TMP12]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP14:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: [[TMP16:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* -// CHECK11-NEXT: [[TMP17:%.*]] = bitcast %struct.S.0* [[TMP14]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP16]], i8* align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group !12 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5]] to i8* +// CHECK11-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false), !llvm.access.group !12 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK11-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], i32* [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP1]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN10]], [[TMP29]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK11-NEXT: [[TMP33:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP34:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR]] to %struct.S.0* +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN7]], [[TMP35]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP28]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP34]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP37:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP32:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[TMP3]] to i8* -// CHECK11-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[TMP32]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP35]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done8: +// CHECK11-NEXT: [[TMP38:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = bitcast %struct.S.0* [[TMP9]] to i8* +// CHECK11-NEXT: [[TMP40:%.*]] = bitcast %struct.S.0* [[TMP38]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP39]], i8* align 4 [[TMP40]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP41]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done10: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp @@ -253,15 +253,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l91 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -276,6 +278,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -291,77 +295,77 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[ARRAYIDX3]] to i8* -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 4, i1 false), !llvm.access.group !5 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[ARRAYIDX3]] to i8* +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false), !llvm.access.group !5 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP14]] // CHECK1-NEXT: store i32 [[ADD4]], i32* [[SIVAR]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i64 2 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP21]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -441,15 +445,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 @@ -465,6 +471,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 @@ -482,74 +490,74 @@ // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 8, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP11:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5]] to i8* -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP10]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false), !llvm.access.group !11 +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5]] to i8* +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[TMP11]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false), !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -755,15 +763,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l91 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -778,6 +788,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -793,75 +805,75 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP10]] -// CHECK3-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 4, i1 false), !llvm.access.group !6 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 4, i1 false), !llvm.access.group !6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP14]] // CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP21]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -941,15 +953,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 @@ -965,6 +979,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 @@ -982,72 +998,72 @@ // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* -// CHECK3-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP10]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 4, i1 false), !llvm.access.group !12 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP11:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[TMP11]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false), !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -2007,15 +2023,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74 // CHECK9-SAME: () #[[ATTR5:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR6:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 @@ -2031,66 +2049,68 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32* undef, i32** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store i32* [[G1]], i32** [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 // CHECK9-NEXT: store i32 1, i32* [[G]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP2]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: store volatile i32 1, i32* [[TMP8]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP2]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store volatile i32 1, i32* [[TMP9]], align 4, !llvm.access.group !4 // CHECK9-NEXT: store i32 2, i32* [[SIVAR]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[G]], i32** [[TMP9]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP2]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP12]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[G]], i32** [[TMP10]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP2]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store i32* [[TMP12]], i32** [[TMP11]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP13]], align 8, !llvm.access.group !4 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp @@ -113,19 +113,22 @@ // CHECK1-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -136,83 +139,85 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR1]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[SIVAR]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -279,19 +284,22 @@ // CHECK1-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -302,83 +310,85 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[T_VAR1]], align 4 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR1]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[T_VAR1]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[T_VAR]], align 4, !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[T_VAR1]] to i8* -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -450,19 +460,22 @@ // CHECK3-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -473,83 +486,85 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR1]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[SIVAR]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP22]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -616,19 +631,22 @@ // CHECK3-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -639,83 +657,85 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[T_VAR1]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR1]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[T_VAR1]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[T_VAR]], align 4, !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i32* [[T_VAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP22]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -966,19 +986,22 @@ // CHECK9-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[TMP0]], i32** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -990,86 +1013,88 @@ // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR1]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[SIVAR1]], i32** [[TMP11]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK9-NEXT: store i32 [[ADD2]], i32* [[SIVAR]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP13]], align 8, !llvm.access.group !4 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP16:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK9-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP17]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP18:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK9-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK9-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP19]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP21]] monotonic, align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP23]] monotonic, align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_map_codegen.cpp b/clang/test/OpenMP/target_teams_map_codegen.cpp --- a/clang/test/OpenMP/target_teams_map_codegen.cpp +++ b/clang/test/OpenMP/target_teams_map_codegen.cpp @@ -115,19 +115,23 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -172,37 +176,39 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[Y_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK1-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 // CHECK1-NEXT: store i32* [[Y]], i32** [[Y_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[X_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[X_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[Y_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[Y_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP5]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]], i64 noundef [[Y:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[X]], i64* [[X_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[Y]], i64* [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[X_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[Y_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[X]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[Y]], align 4 // CHECK1-NEXT: ret void // // @@ -247,63 +253,69 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 // CHECK1-NEXT: store i32* [[Y]], i32** [[Y_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[Y_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[TMP0]], i32* [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[TMP1]], i32** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[Y:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[X1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[Y2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[Y]], i32** [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[Y_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[X1]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[Y2]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[X1]] to i8* -// CHECK1-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[Y2]] to i8* -// CHECK1-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 2, i64 16, i8* [[TMP8]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[X]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[Y]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[X]] to i8* +// CHECK1-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[Y]] to i8* +// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 2, i64 16, i8* [[TMP11]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP12]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[X1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[Y2]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[X]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[Y]], align 4 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD1]], i32* [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[X1]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP14]] monotonic, align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[Y2]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add i32* [[TMP1]], i32 [[TMP16]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[X]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP17]] monotonic, align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[Y]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = atomicrmw add i32* [[TMP4]], i32 [[TMP19]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -374,27 +386,30 @@ // CHECK1-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK1-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[X_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[X_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[X]], i64* [[X_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[X_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[X]], align 4 // CHECK1-NEXT: ret void // // @@ -429,27 +444,30 @@ // CHECK1-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[X_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[X_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[X]], i64* [[X_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[X_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[X]], align 4 // CHECK1-NEXT: ret void // // @@ -484,27 +502,30 @@ // CHECK1-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK1-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[X_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[X_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[X]], i64* [[X_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[X_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[X]], align 4 // CHECK1-NEXT: ret void // // @@ -594,91 +615,97 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca [88 x i32]*, align 8 // CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca [99 x i32]*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store [88 x i32]* [[Y]], [88 x i32]** [[Y_ADDR]], align 8 // CHECK1-NEXT: store [99 x i32]* [[Z]], [99 x i32]** [[Z_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load [88 x i32]*, [88 x i32]** [[Y_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load [99 x i32]*, [99 x i32]** [[Z_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [88 x i32]*, [99 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), [88 x i32]* [[TMP0]], [99 x i32]* [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [88 x i32]* [[TMP0]], [88 x i32]** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [99 x i32]* [[TMP1]], [99 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [88 x i32]* noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], [99 x i32]* noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca [88 x i32]*, align 8 -// CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca [99 x i32]*, align 8 -// CHECK1-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 +// CHECK1-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK1-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK1-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK1-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [88 x i32]* [[Y]], [88 x i32]** [[Y_ADDR]], align 8 -// CHECK1-NEXT: store [99 x i32]* [[Z]], [99 x i32]** [[Z_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [88 x i32]*, [88 x i32]** [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [99 x i32]*, [99 x i32]** [[Z_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = bitcast [88 x i32]* [[Y1]] to i8* -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [88 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP2]], i8* align 4 [[TMP3]], i64 352, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], [99 x i32]* [[Z2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP4]] +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [88 x i32]*, [88 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [99 x i32]*, [99 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [88 x i32]* [[Y]] to i8* +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [88 x i32]* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 352, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], [99 x i32]* [[Z]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i32 0, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[LHS_BEGIN:%.*]] = bitcast [99 x i32]* [[TMP1]] to i32* -// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [99 x i32]* [[Z2]] to i32* -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* -// CHECK1-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, i8* [[TMP9]], void (i8*, i8*)* @.omp.reduction.reduction_func.17, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[LHS_BEGIN:%.*]] = bitcast [99 x i32]* [[TMP4]] to i32* +// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [99 x i32]* [[Z]] to i32* +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 1, i64 8, i8* [[TMP12]], void (i8*, i8*)* @.omp.reduction.reduction_func.17, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP11]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done6: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done4: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP14]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK1: omp.arraycpy.body8: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK1: omp.arraycpy.done14: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK1: omp.arraycpy.body6: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK1: omp.arraycpy.done12: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -724,91 +751,97 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca [88 x i32]*, align 8 // CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca [99 x i32]*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store [88 x i32]* [[Y]], [88 x i32]** [[Y_ADDR]], align 8 // CHECK1-NEXT: store [99 x i32]* [[Z]], [99 x i32]** [[Z_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load [88 x i32]*, [88 x i32]** [[Y_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load [99 x i32]*, [99 x i32]** [[Z_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [88 x i32]*, [99 x i32]*)* @.omp_outlined..20 to void (i32*, i32*, ...)*), [88 x i32]* [[TMP0]], [99 x i32]* [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [88 x i32]* [[TMP0]], [88 x i32]** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [99 x i32]* [[TMP1]], [99 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..20 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..20 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [88 x i32]* noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], [99 x i32]* noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca [88 x i32]*, align 8 -// CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca [99 x i32]*, align 8 -// CHECK1-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK1-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK1-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK1-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK1-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [88 x i32]* [[Y]], [88 x i32]** [[Y_ADDR]], align 8 -// CHECK1-NEXT: store [99 x i32]* [[Z]], [99 x i32]** [[Z_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [88 x i32]*, [88 x i32]** [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [99 x i32]*, [99 x i32]** [[Z_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = bitcast [88 x i32]* [[Y1]] to i8* -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [88 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP2]], i8* align 4 [[TMP3]], i64 352, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], [99 x i32]* [[Z2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP4]] +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [88 x i32]*, [88 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [99 x i32]*, [99 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [88 x i32]* [[Y]] to i8* +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [88 x i32]* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 352, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], [99 x i32]* [[Z]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i32 0, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[LHS_BEGIN:%.*]] = bitcast [99 x i32]* [[TMP1]] to i32* -// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [99 x i32]* [[Z2]] to i32* -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* -// CHECK1-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, i8* [[TMP9]], void (i8*, i8*)* @.omp.reduction.reduction_func.21, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[LHS_BEGIN:%.*]] = bitcast [99 x i32]* [[TMP4]] to i32* +// CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [99 x i32]* [[Z]] to i32* +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 1, i64 8, i8* [[TMP12]], void (i8*, i8*)* @.omp.reduction.reduction_func.21, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP11]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done6: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done4: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP14]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK1: omp.arraycpy.body8: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK1: omp.arraycpy.done14: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK1: omp.arraycpy.body6: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK1: omp.arraycpy.done12: +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -935,75 +968,81 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca i128*, align 8 // CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca i128*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i128* [[Y]], i128** [[Y_ADDR]], align 8 // CHECK1-NEXT: store i128* [[Z]], i128** [[Z_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i128*, i128** [[Y_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i128*, i128** [[Z_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i128*, i128*)* @.omp_outlined..24 to void (i32*, i32*, ...)*), i128* [[TMP0]], i128* [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i128* [[TMP0]], i128** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i128* [[TMP1]], i128** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..24 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..24 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i128* noundef nonnull align 16 dereferenceable(16) [[Y:%.*]], i128* noundef nonnull align 16 dereferenceable(16) [[Z:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca i128*, align 8 -// CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca i128*, align 8 -// CHECK1-NEXT: [[Y1:%.*]] = alloca i128, align 16 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK1-NEXT: [[Y:%.*]] = alloca i128, align 16 // CHECK1-NEXT: [[X:%.*]] = alloca i128, align 16 -// CHECK1-NEXT: [[Z2:%.*]] = alloca i128, align 16 +// CHECK1-NEXT: [[Z:%.*]] = alloca i128, align 16 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i128, align 16 -// CHECK1-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i128, align 16 +// CHECK1-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i128, align 16 // CHECK1-NEXT: [[TMP:%.*]] = alloca i128, align 16 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i128* [[Y]], i128** [[Y_ADDR]], align 8 -// CHECK1-NEXT: store i128* [[Z]], i128** [[Z_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i128*, i128** [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i128*, i128** [[Z_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i128, i128* [[TMP0]], align 16 -// CHECK1-NEXT: store i128 [[TMP2]], i128* [[Y1]], align 16 -// CHECK1-NEXT: store i128 0, i128* [[Z2]], align 16 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i128* [[Z2]] to i8* -// CHECK1-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, i8* [[TMP7]], void (i8*, i8*)* @.omp.reduction.reduction_func.25, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i128*, i128** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i128*, i128** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i128, i128* [[TMP2]], align 16 +// CHECK1-NEXT: store i128 [[TMP5]], i128* [[Y]], align 16 +// CHECK1-NEXT: store i128 0, i128* [[Z]], align 16 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i128* [[Z]] to i8* +// CHECK1-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 1, i64 8, i8* [[TMP10]], void (i8*, i8*)* @.omp.reduction.reduction_func.25, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP11]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP9:%.*]] = load i128, i128* [[TMP1]], align 16 -// CHECK1-NEXT: [[TMP10:%.*]] = load i128, i128* [[Z2]], align 16 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: store i128 [[ADD]], i128* [[TMP1]], align 16 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP12:%.*]] = load i128, i128* [[TMP4]], align 16 +// CHECK1-NEXT: [[TMP13:%.*]] = load i128, i128* [[Z]], align 16 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: store i128 [[ADD]], i128* [[TMP4]], align 16 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP11:%.*]] = load i128, i128* [[Z2]], align 16 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i128* [[TMP1]] to i8* -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast i128* [[ATOMIC_TEMP]] to i8* -// CHECK1-NEXT: call void @__atomic_load(i64 noundef 16, i8* noundef [[TMP12]], i8* noundef [[TMP13]], i32 noundef signext 0) +// CHECK1-NEXT: [[TMP14:%.*]] = load i128, i128* [[Z]], align 16 +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i128* [[TMP4]] to i8* +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i128* [[ATOMIC_TEMP]] to i8* +// CHECK1-NEXT: call void @__atomic_load(i64 noundef 16, i8* noundef [[TMP15]], i8* noundef [[TMP16]], i32 noundef signext 0) // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP14:%.*]] = load i128, i128* [[ATOMIC_TEMP]], align 16 -// CHECK1-NEXT: store i128 [[TMP14]], i128* [[TMP]], align 16 -// CHECK1-NEXT: [[TMP15:%.*]] = load i128, i128* [[TMP]], align 16 -// CHECK1-NEXT: [[TMP16:%.*]] = load i128, i128* [[Z2]], align 16 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: store i128 [[ADD4]], i128* [[ATOMIC_TEMP3]], align 16 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i128* [[TMP1]] to i8* -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i128* [[ATOMIC_TEMP]] to i8* -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i128* [[ATOMIC_TEMP3]] to i8* -// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, i8* noundef [[TMP17]], i8* noundef [[TMP18]], i8* noundef [[TMP19]], i32 noundef signext 0, i32 noundef signext 0) +// CHECK1-NEXT: [[TMP17:%.*]] = load i128, i128* [[ATOMIC_TEMP]], align 16 +// CHECK1-NEXT: store i128 [[TMP17]], i128* [[TMP]], align 16 +// CHECK1-NEXT: [[TMP18:%.*]] = load i128, i128* [[TMP]], align 16 +// CHECK1-NEXT: [[TMP19:%.*]] = load i128, i128* [[Z]], align 16 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i128 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i128 [[ADD2]], i128* [[ATOMIC_TEMP1]], align 16 +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i128* [[TMP4]] to i8* +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i128* [[ATOMIC_TEMP]] to i8* +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast i128* [[ATOMIC_TEMP1]] to i8* +// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, i8* noundef [[TMP20]], i8* noundef [[TMP21]], i8* noundef [[TMP22]], i32 noundef signext 0, i32 noundef signext 0) // CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -1038,75 +1077,81 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca i128*, align 8 // CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca i128*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i128* [[Y]], i128** [[Y_ADDR]], align 8 // CHECK1-NEXT: store i128* [[Z]], i128** [[Z_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i128*, i128** [[Y_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i128*, i128** [[Z_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i128*, i128*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i128* [[TMP0]], i128* [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i128* [[TMP0]], i128** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i128* [[TMP1]], i128** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..28 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i128* noundef nonnull align 16 dereferenceable(16) [[Y:%.*]], i128* noundef nonnull align 16 dereferenceable(16) [[Z:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca i128*, align 8 -// CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca i128*, align 8 -// CHECK1-NEXT: [[Y1:%.*]] = alloca i128, align 16 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK1-NEXT: [[Y:%.*]] = alloca i128, align 16 // CHECK1-NEXT: [[X:%.*]] = alloca i128, align 16 -// CHECK1-NEXT: [[Z2:%.*]] = alloca i128, align 16 +// CHECK1-NEXT: [[Z:%.*]] = alloca i128, align 16 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i128, align 16 -// CHECK1-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i128, align 16 +// CHECK1-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i128, align 16 // CHECK1-NEXT: [[TMP:%.*]] = alloca i128, align 16 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i128* [[Y]], i128** [[Y_ADDR]], align 8 -// CHECK1-NEXT: store i128* [[Z]], i128** [[Z_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i128*, i128** [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i128*, i128** [[Z_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i128, i128* [[TMP0]], align 16 -// CHECK1-NEXT: store i128 [[TMP2]], i128* [[Y1]], align 16 -// CHECK1-NEXT: store i128 0, i128* [[Z2]], align 16 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i128* [[Z2]] to i8* -// CHECK1-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, i8* [[TMP7]], void (i8*, i8*)* @.omp.reduction.reduction_func.29, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i128*, i128** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i128*, i128** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i128, i128* [[TMP2]], align 16 +// CHECK1-NEXT: store i128 [[TMP5]], i128* [[Y]], align 16 +// CHECK1-NEXT: store i128 0, i128* [[Z]], align 16 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i128* [[Z]] to i8* +// CHECK1-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 1, i64 8, i8* [[TMP10]], void (i8*, i8*)* @.omp.reduction.reduction_func.29, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP11]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP9:%.*]] = load i128, i128* [[TMP1]], align 16 -// CHECK1-NEXT: [[TMP10:%.*]] = load i128, i128* [[Z2]], align 16 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: store i128 [[ADD]], i128* [[TMP1]], align 16 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP12:%.*]] = load i128, i128* [[TMP4]], align 16 +// CHECK1-NEXT: [[TMP13:%.*]] = load i128, i128* [[Z]], align 16 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: store i128 [[ADD]], i128* [[TMP4]], align 16 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP11:%.*]] = load i128, i128* [[Z2]], align 16 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i128* [[TMP1]] to i8* -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast i128* [[ATOMIC_TEMP]] to i8* -// CHECK1-NEXT: call void @__atomic_load(i64 noundef 16, i8* noundef [[TMP12]], i8* noundef [[TMP13]], i32 noundef signext 0) +// CHECK1-NEXT: [[TMP14:%.*]] = load i128, i128* [[Z]], align 16 +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i128* [[TMP4]] to i8* +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i128* [[ATOMIC_TEMP]] to i8* +// CHECK1-NEXT: call void @__atomic_load(i64 noundef 16, i8* noundef [[TMP15]], i8* noundef [[TMP16]], i32 noundef signext 0) // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP14:%.*]] = load i128, i128* [[ATOMIC_TEMP]], align 16 -// CHECK1-NEXT: store i128 [[TMP14]], i128* [[TMP]], align 16 -// CHECK1-NEXT: [[TMP15:%.*]] = load i128, i128* [[TMP]], align 16 -// CHECK1-NEXT: [[TMP16:%.*]] = load i128, i128* [[Z2]], align 16 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: store i128 [[ADD4]], i128* [[ATOMIC_TEMP3]], align 16 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i128* [[TMP1]] to i8* -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i128* [[ATOMIC_TEMP]] to i8* -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i128* [[ATOMIC_TEMP3]] to i8* -// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, i8* noundef [[TMP17]], i8* noundef [[TMP18]], i8* noundef [[TMP19]], i32 noundef signext 0, i32 noundef signext 0) +// CHECK1-NEXT: [[TMP17:%.*]] = load i128, i128* [[ATOMIC_TEMP]], align 16 +// CHECK1-NEXT: store i128 [[TMP17]], i128* [[TMP]], align 16 +// CHECK1-NEXT: [[TMP18:%.*]] = load i128, i128* [[TMP]], align 16 +// CHECK1-NEXT: [[TMP19:%.*]] = load i128, i128* [[Z]], align 16 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i128 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i128 [[ADD2]], i128* [[ATOMIC_TEMP1]], align 16 +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i128* [[TMP4]] to i8* +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i128* [[ATOMIC_TEMP]] to i8* +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast i128* [[ATOMIC_TEMP1]] to i8* +// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, i8* noundef [[TMP20]], i8* noundef [[TMP21]], i8* noundef [[TMP22]], i32 noundef signext 0, i32 noundef signext 0) // CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -1182,19 +1227,23 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27 // CHECK3-SAME: () #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1239,33 +1288,39 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[Y_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 4 // CHECK3-NEXT: store i32* [[Y]], i32** [[Y_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[Y_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[X_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[X_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[Y_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[Y_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[Y]], i32* [[Y_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[X]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[Y]], align 4 // CHECK3-NEXT: ret void // // @@ -1310,63 +1365,69 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 4 // CHECK3-NEXT: store i32* [[Y]], i32** [[Y_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[Y_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[TMP0]], i32* [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[TMP0]], i32** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[TMP1]], i32** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[Y:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[X1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[Y2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK3-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[Y]], i32** [[Y_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[Y_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[X1]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[Y2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast i32* [[X1]] to i8* -// CHECK3-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i32* [[Y2]] to i8* -// CHECK3-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 2, i32 8, i8* [[TMP8]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[X]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[Y]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i32* [[X]] to i8* +// CHECK3-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i32* [[Y]] to i8* +// CHECK3-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 2, i32 8, i8* [[TMP11]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP12]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[X1]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[Y2]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[X]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[Y]], align 4 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD1]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[X1]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP14]] monotonic, align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[Y2]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add i32* [[TMP1]], i32 [[TMP16]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[X]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP17]] monotonic, align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[Y]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = atomicrmw add i32* [[TMP4]], i32 [[TMP19]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1437,25 +1498,30 @@ // CHECK3-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[X_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[X_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK3-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[X]], align 4 // CHECK3-NEXT: ret void // // @@ -1490,25 +1556,30 @@ // CHECK3-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[X_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[X_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK3-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[X]], align 4 // CHECK3-NEXT: ret void // // @@ -1543,25 +1614,30 @@ // CHECK3-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[X_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[X_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK3-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[X]], align 4 // CHECK3-NEXT: ret void // // @@ -1651,91 +1727,97 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca [88 x i32]*, align 4 // CHECK3-NEXT: [[Z_ADDR:%.*]] = alloca [99 x i32]*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store [88 x i32]* [[Y]], [88 x i32]** [[Y_ADDR]], align 4 // CHECK3-NEXT: store [99 x i32]* [[Z]], [99 x i32]** [[Z_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [88 x i32]*, [88 x i32]** [[Y_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load [99 x i32]*, [99 x i32]** [[Z_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [88 x i32]*, [99 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), [88 x i32]* [[TMP0]], [99 x i32]* [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [88 x i32]* [[TMP0]], [88 x i32]** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store [99 x i32]* [[TMP1]], [99 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [88 x i32]* noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], [99 x i32]* noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca [88 x i32]*, align 4 -// CHECK3-NEXT: [[Z_ADDR:%.*]] = alloca [99 x i32]*, align 4 -// CHECK3-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK3-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK3-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK3-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK3-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [88 x i32]* [[Y]], [88 x i32]** [[Y_ADDR]], align 4 -// CHECK3-NEXT: store [99 x i32]* [[Z]], [99 x i32]** [[Z_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [88 x i32]*, [88 x i32]** [[Y_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [99 x i32]*, [99 x i32]** [[Z_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = bitcast [88 x i32]* [[Y1]] to i8* -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast [88 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP2]], i8* align 4 [[TMP3]], i32 352, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], [99 x i32]* [[Z2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP4]] +// CHECK3-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [88 x i32]*, [88 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load [99 x i32]*, [99 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [88 x i32]* [[Y]] to i8* +// CHECK3-NEXT: [[TMP6:%.*]] = bitcast [88 x i32]* [[TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i32 352, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], [99 x i32]* [[Z]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP7]] // CHECK3-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK3: omp.arrayinit.body: // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK3-NEXT: store i32 0, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK3: omp.arrayinit.done: -// CHECK3-NEXT: [[LHS_BEGIN:%.*]] = bitcast [99 x i32]* [[TMP1]] to i32* -// CHECK3-NEXT: [[RHS_BEGIN:%.*]] = bitcast [99 x i32]* [[Z2]] to i32* -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* -// CHECK3-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, i8* [[TMP9]], void (i8*, i8*)* @.omp.reduction.reduction_func.17, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[LHS_BEGIN:%.*]] = bitcast [99 x i32]* [[TMP4]] to i32* +// CHECK3-NEXT: [[RHS_BEGIN:%.*]] = bitcast [99 x i32]* [[Z]] to i32* +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* +// CHECK3-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 1, i32 4, i8* [[TMP12]], void (i8*, i8*)* @.omp.reduction.reduction_func.17, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP11]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP14]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: // CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done6: -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP14]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done4: +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP14]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK3: omp.arraycpy.body8: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK3: omp.arraycpy.done14: -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP17]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK3: omp.arraycpy.body6: +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP17]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK3: omp.arraycpy.done12: +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1781,91 +1863,97 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca [88 x i32]*, align 4 // CHECK3-NEXT: [[Z_ADDR:%.*]] = alloca [99 x i32]*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store [88 x i32]* [[Y]], [88 x i32]** [[Y_ADDR]], align 4 // CHECK3-NEXT: store [99 x i32]* [[Z]], [99 x i32]** [[Z_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [88 x i32]*, [88 x i32]** [[Y_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load [99 x i32]*, [99 x i32]** [[Z_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [88 x i32]*, [99 x i32]*)* @.omp_outlined..20 to void (i32*, i32*, ...)*), [88 x i32]* [[TMP0]], [99 x i32]* [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [88 x i32]* [[TMP0]], [88 x i32]** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store [99 x i32]* [[TMP1]], [99 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..20 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..20 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [88 x i32]* noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], [99 x i32]* noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca [88 x i32]*, align 4 -// CHECK3-NEXT: [[Z_ADDR:%.*]] = alloca [99 x i32]*, align 4 -// CHECK3-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK3-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK3-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK3-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK3-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [88 x i32]* [[Y]], [88 x i32]** [[Y_ADDR]], align 4 -// CHECK3-NEXT: store [99 x i32]* [[Z]], [99 x i32]** [[Z_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [88 x i32]*, [88 x i32]** [[Y_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [99 x i32]*, [99 x i32]** [[Z_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = bitcast [88 x i32]* [[Y1]] to i8* -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast [88 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP2]], i8* align 4 [[TMP3]], i32 352, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], [99 x i32]* [[Z2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP4]] +// CHECK3-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [88 x i32]*, [88 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load [99 x i32]*, [99 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [88 x i32]* [[Y]] to i8* +// CHECK3-NEXT: [[TMP6:%.*]] = bitcast [88 x i32]* [[TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i32 352, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], [99 x i32]* [[Z]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP7]] // CHECK3-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK3: omp.arrayinit.body: // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK3-NEXT: store i32 0, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK3: omp.arrayinit.done: -// CHECK3-NEXT: [[LHS_BEGIN:%.*]] = bitcast [99 x i32]* [[TMP1]] to i32* -// CHECK3-NEXT: [[RHS_BEGIN:%.*]] = bitcast [99 x i32]* [[Z2]] to i32* -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* -// CHECK3-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, i8* [[TMP9]], void (i8*, i8*)* @.omp.reduction.reduction_func.21, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[LHS_BEGIN:%.*]] = bitcast [99 x i32]* [[TMP4]] to i32* +// CHECK3-NEXT: [[RHS_BEGIN:%.*]] = bitcast [99 x i32]* [[Z]] to i32* +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* +// CHECK3-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 1, i32 4, i8* [[TMP12]], void (i8*, i8*)* @.omp.reduction.reduction_func.21, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP11]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP14]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: // CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done6: -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP14]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done4: +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP14]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK3: omp.arraycpy.body8: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK3: omp.arraycpy.done14: -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP17]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK3: omp.arraycpy.body6: +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP17]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK3: omp.arraycpy.done12: +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1916,19 +2004,23 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27 // CHECK5-SAME: () #[[ATTR0:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: ret void // // @@ -1937,37 +2029,39 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[Y_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK5-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 // CHECK5-NEXT: store i32* [[Y]], i32** [[Y_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[Y_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[X_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[CONV]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[X_CASTED]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[Y_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[CONV1]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[Y_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP5]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK5-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]], i64 noundef [[Y:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[X]], i64* [[X_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[Y]], i64* [[Y_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[X_ADDR]] to i32* -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[Y_ADDR]] to i32* +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[X]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], i32* [[Y]], align 4 // CHECK5-NEXT: ret void // // @@ -1976,63 +2070,69 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 // CHECK5-NEXT: store i32* [[Y]], i32** [[Y_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[Y_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[TMP0]], i32* [[TMP1]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i32* [[TMP0]], i32** [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i32* [[TMP1]], i32** [[TMP3]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[Y:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[X1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[Y2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 -// CHECK5-NEXT: store i32* [[Y]], i32** [[Y_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[Y_ADDR]], align 8 -// CHECK5-NEXT: store i32 0, i32* [[X1]], align 4 -// CHECK5-NEXT: store i32 0, i32* [[Y2]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP3:%.*]] = bitcast i32* [[X1]] to i8* -// CHECK5-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i32* [[Y2]] to i8* -// CHECK5-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 2, i64 16, i8* [[TMP8]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[X]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[Y]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i32* [[X]] to i8* +// CHECK5-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i32* [[Y]] to i8* +// CHECK5-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK5-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 2, i64 16, i8* [[TMP11]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP12]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[X1]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[Y2]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[X]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[Y]], align 4 +// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[TMP4]], align 4 +// CHECK5-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[X1]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP14]] monotonic, align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[Y2]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = atomicrmw add i32* [[TMP1]], i32 [[TMP16]] monotonic, align 4 -// CHECK5-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[X]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP17]] monotonic, align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[Y]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = atomicrmw add i32* [[TMP4]], i32 [[TMP19]] monotonic, align 4 +// CHECK5-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -2076,27 +2176,30 @@ // CHECK5-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK5-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[X_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[X_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[X]], i64* [[X_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[X_ADDR]] to i32* +// CHECK5-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[X]], align 4 // CHECK5-NEXT: ret void // // @@ -2104,27 +2207,30 @@ // CHECK5-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK5-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[X_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[X_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[X]], i64* [[X_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[X_ADDR]] to i32* +// CHECK5-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[X]], align 4 // CHECK5-NEXT: ret void // // @@ -2132,27 +2238,30 @@ // CHECK5-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK5-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[X_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[X_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[X]], i64* [[X_ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[X_ADDR]] to i32* +// CHECK5-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[X]], align 4 // CHECK5-NEXT: ret void // // @@ -2161,91 +2270,97 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca [88 x i32]*, align 8 // CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca [99 x i32]*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK5-NEXT: store [88 x i32]* [[Y]], [88 x i32]** [[Y_ADDR]], align 8 // CHECK5-NEXT: store [99 x i32]* [[Z]], [99 x i32]** [[Z_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load [88 x i32]*, [88 x i32]** [[Y_ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load [99 x i32]*, [99 x i32]** [[Z_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [88 x i32]*, [99 x i32]*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), [88 x i32]* [[TMP0]], [99 x i32]* [[TMP1]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store [88 x i32]* [[TMP0]], [88 x i32]** [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store [99 x i32]* [[TMP1]], [99 x i32]** [[TMP3]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [88 x i32]* noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], [99 x i32]* noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca [88 x i32]*, align 8 -// CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca [99 x i32]*, align 8 -// CHECK5-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 +// CHECK5-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK5-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK5-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK5-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store [88 x i32]* [[Y]], [88 x i32]** [[Y_ADDR]], align 8 -// CHECK5-NEXT: store [99 x i32]* [[Z]], [99 x i32]** [[Z_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load [88 x i32]*, [88 x i32]** [[Y_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load [99 x i32]*, [99 x i32]** [[Z_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = bitcast [88 x i32]* [[Y1]] to i8* -// CHECK5-NEXT: [[TMP3:%.*]] = bitcast [88 x i32]* [[TMP0]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP2]], i8* align 4 [[TMP3]], i64 352, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], [99 x i32]* [[Z2]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP4]] +// CHECK5-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load [88 x i32]*, [88 x i32]** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load [99 x i32]*, [99 x i32]** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = bitcast [88 x i32]* [[Y]] to i8* +// CHECK5-NEXT: [[TMP6:%.*]] = bitcast [88 x i32]* [[TMP2]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 352, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], [99 x i32]* [[Z]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP7]] // CHECK5-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK5: omp.arrayinit.body: // CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK5-NEXT: store i32 0, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK5: omp.arrayinit.done: -// CHECK5-NEXT: [[LHS_BEGIN:%.*]] = bitcast [99 x i32]* [[TMP1]] to i32* -// CHECK5-NEXT: [[RHS_BEGIN:%.*]] = bitcast [99 x i32]* [[Z2]] to i32* -// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* -// CHECK5-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK5-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, i8* [[TMP9]], void (i8*, i8*)* @.omp.reduction.reduction_func.7, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: [[LHS_BEGIN:%.*]] = bitcast [99 x i32]* [[TMP4]] to i32* +// CHECK5-NEXT: [[RHS_BEGIN:%.*]] = bitcast [99 x i32]* [[Z]] to i32* +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP9:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* +// CHECK5-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 1, i64 8, i8* [[TMP12]], void (i8*, i8*)* @.omp.reduction.reduction_func.7, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP11]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP14]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: // CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done6: -// CHECK5-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP14]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done4: +// CHECK5-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP14]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK5: omp.arraycpy.body8: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK5: omp.arraycpy.done14: -// CHECK5-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP17]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK5: omp.arraycpy.body6: +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP18]] monotonic, align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP17]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK5: omp.arraycpy.done12: +// CHECK5-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -2291,91 +2406,97 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca [88 x i32]*, align 8 // CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca [99 x i32]*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store [88 x i32]* [[Y]], [88 x i32]** [[Y_ADDR]], align 8 // CHECK5-NEXT: store [99 x i32]* [[Z]], [99 x i32]** [[Z_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load [88 x i32]*, [88 x i32]** [[Y_ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load [99 x i32]*, [99 x i32]** [[Z_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [88 x i32]*, [99 x i32]*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), [88 x i32]* [[TMP0]], [99 x i32]* [[TMP1]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store [88 x i32]* [[TMP0]], [88 x i32]** [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store [99 x i32]* [[TMP1]], [99 x i32]** [[TMP3]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [88 x i32]* noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], [99 x i32]* noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca [88 x i32]*, align 8 -// CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca [99 x i32]*, align 8 -// CHECK5-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK5-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK5-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK5-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK5-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store [88 x i32]* [[Y]], [88 x i32]** [[Y_ADDR]], align 8 -// CHECK5-NEXT: store [99 x i32]* [[Z]], [99 x i32]** [[Z_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load [88 x i32]*, [88 x i32]** [[Y_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load [99 x i32]*, [99 x i32]** [[Z_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = bitcast [88 x i32]* [[Y1]] to i8* -// CHECK5-NEXT: [[TMP3:%.*]] = bitcast [88 x i32]* [[TMP0]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP2]], i8* align 4 [[TMP3]], i64 352, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], [99 x i32]* [[Z2]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP4]] +// CHECK5-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load [88 x i32]*, [88 x i32]** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load [99 x i32]*, [99 x i32]** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = bitcast [88 x i32]* [[Y]] to i8* +// CHECK5-NEXT: [[TMP6:%.*]] = bitcast [88 x i32]* [[TMP2]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 352, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], [99 x i32]* [[Z]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP7]] // CHECK5-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK5: omp.arrayinit.body: // CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK5-NEXT: store i32 0, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK5: omp.arrayinit.done: -// CHECK5-NEXT: [[LHS_BEGIN:%.*]] = bitcast [99 x i32]* [[TMP1]] to i32* -// CHECK5-NEXT: [[RHS_BEGIN:%.*]] = bitcast [99 x i32]* [[Z2]] to i32* -// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* -// CHECK5-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK5-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, i8* [[TMP9]], void (i8*, i8*)* @.omp.reduction.reduction_func.9, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: [[LHS_BEGIN:%.*]] = bitcast [99 x i32]* [[TMP4]] to i32* +// CHECK5-NEXT: [[RHS_BEGIN:%.*]] = bitcast [99 x i32]* [[Z]] to i32* +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP9:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* +// CHECK5-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 1, i64 8, i8* [[TMP12]], void (i8*, i8*)* @.omp.reduction.reduction_func.9, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP11]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP14]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: // CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done6: -// CHECK5-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP14]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done4: +// CHECK5-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP14]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK5: omp.arraycpy.body8: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK5: omp.arraycpy.done14: -// CHECK5-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP17]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK5: omp.arraycpy.body6: +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP18]] monotonic, align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP17]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK5: omp.arraycpy.done12: +// CHECK5-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -2421,75 +2542,81 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca i128*, align 8 // CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca i128*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK5-NEXT: store i128* [[Y]], i128** [[Y_ADDR]], align 8 // CHECK5-NEXT: store i128* [[Z]], i128** [[Z_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i128*, i128** [[Y_ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load i128*, i128** [[Z_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i128*, i128*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i128* [[TMP0]], i128* [[TMP1]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i128* [[TMP0]], i128** [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i128* [[TMP1]], i128** [[TMP3]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i128* noundef nonnull align 16 dereferenceable(16) [[Y:%.*]], i128* noundef nonnull align 16 dereferenceable(16) [[Z:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca i128*, align 8 -// CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca i128*, align 8 -// CHECK5-NEXT: [[Y1:%.*]] = alloca i128, align 16 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK5-NEXT: [[Y:%.*]] = alloca i128, align 16 // CHECK5-NEXT: [[X:%.*]] = alloca i128, align 16 -// CHECK5-NEXT: [[Z2:%.*]] = alloca i128, align 16 +// CHECK5-NEXT: [[Z:%.*]] = alloca i128, align 16 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK5-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i128, align 16 -// CHECK5-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i128, align 16 +// CHECK5-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i128, align 16 // CHECK5-NEXT: [[TMP:%.*]] = alloca i128, align 16 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i128* [[Y]], i128** [[Y_ADDR]], align 8 -// CHECK5-NEXT: store i128* [[Z]], i128** [[Z_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i128*, i128** [[Y_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i128*, i128** [[Z_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i128, i128* [[TMP0]], align 16 -// CHECK5-NEXT: store i128 [[TMP2]], i128* [[Y1]], align 16 -// CHECK5-NEXT: store i128 0, i128* [[Z2]], align 16 -// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i128* [[Z2]] to i8* -// CHECK5-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK5-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, i8* [[TMP7]], void (i8*, i8*)* @.omp.reduction.reduction_func.11, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP8]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i128*, i128** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i128*, i128** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i128, i128* [[TMP2]], align 16 +// CHECK5-NEXT: store i128 [[TMP5]], i128* [[Y]], align 16 +// CHECK5-NEXT: store i128 0, i128* [[Z]], align 16 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP7:%.*]] = bitcast i128* [[Z]] to i8* +// CHECK5-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK5-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 1, i64 8, i8* [[TMP10]], void (i8*, i8*)* @.omp.reduction.reduction_func.11, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP11]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP9:%.*]] = load i128, i128* [[TMP1]], align 16 -// CHECK5-NEXT: [[TMP10:%.*]] = load i128, i128* [[Z2]], align 16 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP9]], [[TMP10]] -// CHECK5-NEXT: store i128 [[ADD]], i128* [[TMP1]], align 16 -// CHECK5-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP12:%.*]] = load i128, i128* [[TMP4]], align 16 +// CHECK5-NEXT: [[TMP13:%.*]] = load i128, i128* [[Z]], align 16 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: store i128 [[ADD]], i128* [[TMP4]], align 16 +// CHECK5-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP11:%.*]] = load i128, i128* [[Z2]], align 16 -// CHECK5-NEXT: [[TMP12:%.*]] = bitcast i128* [[TMP1]] to i8* -// CHECK5-NEXT: [[TMP13:%.*]] = bitcast i128* [[ATOMIC_TEMP]] to i8* -// CHECK5-NEXT: call void @__atomic_load(i64 noundef 16, i8* noundef [[TMP12]], i8* noundef [[TMP13]], i32 noundef signext 0) #[[ATTR6:[0-9]+]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i128, i128* [[Z]], align 16 +// CHECK5-NEXT: [[TMP15:%.*]] = bitcast i128* [[TMP4]] to i8* +// CHECK5-NEXT: [[TMP16:%.*]] = bitcast i128* [[ATOMIC_TEMP]] to i8* +// CHECK5-NEXT: call void @__atomic_load(i64 noundef 16, i8* noundef [[TMP15]], i8* noundef [[TMP16]], i32 noundef signext 0) #[[ATTR6:[0-9]+]] // CHECK5-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK5: atomic_cont: -// CHECK5-NEXT: [[TMP14:%.*]] = load i128, i128* [[ATOMIC_TEMP]], align 16 -// CHECK5-NEXT: store i128 [[TMP14]], i128* [[TMP]], align 16 -// CHECK5-NEXT: [[TMP15:%.*]] = load i128, i128* [[TMP]], align 16 -// CHECK5-NEXT: [[TMP16:%.*]] = load i128, i128* [[Z2]], align 16 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP15]], [[TMP16]] -// CHECK5-NEXT: store i128 [[ADD4]], i128* [[ATOMIC_TEMP3]], align 16 -// CHECK5-NEXT: [[TMP17:%.*]] = bitcast i128* [[TMP1]] to i8* -// CHECK5-NEXT: [[TMP18:%.*]] = bitcast i128* [[ATOMIC_TEMP]] to i8* -// CHECK5-NEXT: [[TMP19:%.*]] = bitcast i128* [[ATOMIC_TEMP3]] to i8* -// CHECK5-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, i8* noundef [[TMP17]], i8* noundef [[TMP18]], i8* noundef [[TMP19]], i32 noundef signext 0, i32 noundef signext 0) #[[ATTR6]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i128, i128* [[ATOMIC_TEMP]], align 16 +// CHECK5-NEXT: store i128 [[TMP17]], i128* [[TMP]], align 16 +// CHECK5-NEXT: [[TMP18:%.*]] = load i128, i128* [[TMP]], align 16 +// CHECK5-NEXT: [[TMP19:%.*]] = load i128, i128* [[Z]], align 16 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i128 [[TMP18]], [[TMP19]] +// CHECK5-NEXT: store i128 [[ADD2]], i128* [[ATOMIC_TEMP1]], align 16 +// CHECK5-NEXT: [[TMP20:%.*]] = bitcast i128* [[TMP4]] to i8* +// CHECK5-NEXT: [[TMP21:%.*]] = bitcast i128* [[ATOMIC_TEMP]] to i8* +// CHECK5-NEXT: [[TMP22:%.*]] = bitcast i128* [[ATOMIC_TEMP1]] to i8* +// CHECK5-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, i8* noundef [[TMP20]], i8* noundef [[TMP21]], i8* noundef [[TMP22]], i32 noundef signext 0, i32 noundef signext 0) #[[ATTR6]] // CHECK5-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK5: atomic_exit: -// CHECK5-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -2524,75 +2651,81 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca i128*, align 8 // CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca i128*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store i128* [[Y]], i128** [[Y_ADDR]], align 8 // CHECK5-NEXT: store i128* [[Z]], i128** [[Z_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i128*, i128** [[Y_ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load i128*, i128** [[Z_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i128*, i128*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i128* [[TMP0]], i128* [[TMP1]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i128* [[TMP0]], i128** [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i128* [[TMP1]], i128** [[TMP3]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i128* noundef nonnull align 16 dereferenceable(16) [[Y:%.*]], i128* noundef nonnull align 16 dereferenceable(16) [[Z:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca i128*, align 8 -// CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca i128*, align 8 -// CHECK5-NEXT: [[Y1:%.*]] = alloca i128, align 16 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK5-NEXT: [[Y:%.*]] = alloca i128, align 16 // CHECK5-NEXT: [[X:%.*]] = alloca i128, align 16 -// CHECK5-NEXT: [[Z2:%.*]] = alloca i128, align 16 +// CHECK5-NEXT: [[Z:%.*]] = alloca i128, align 16 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK5-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i128, align 16 -// CHECK5-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i128, align 16 +// CHECK5-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i128, align 16 // CHECK5-NEXT: [[TMP:%.*]] = alloca i128, align 16 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i128* [[Y]], i128** [[Y_ADDR]], align 8 -// CHECK5-NEXT: store i128* [[Z]], i128** [[Z_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i128*, i128** [[Y_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i128*, i128** [[Z_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i128, i128* [[TMP0]], align 16 -// CHECK5-NEXT: store i128 [[TMP2]], i128* [[Y1]], align 16 -// CHECK5-NEXT: store i128 0, i128* [[Z2]], align 16 -// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i128* [[Z2]] to i8* -// CHECK5-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK5-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, i8* [[TMP7]], void (i8*, i8*)* @.omp.reduction.reduction_func.13, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP8]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i128*, i128** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i128*, i128** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i128, i128* [[TMP2]], align 16 +// CHECK5-NEXT: store i128 [[TMP5]], i128* [[Y]], align 16 +// CHECK5-NEXT: store i128 0, i128* [[Z]], align 16 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP7:%.*]] = bitcast i128* [[Z]] to i8* +// CHECK5-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK5-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 1, i64 8, i8* [[TMP10]], void (i8*, i8*)* @.omp.reduction.reduction_func.13, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP11]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP9:%.*]] = load i128, i128* [[TMP1]], align 16 -// CHECK5-NEXT: [[TMP10:%.*]] = load i128, i128* [[Z2]], align 16 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP9]], [[TMP10]] -// CHECK5-NEXT: store i128 [[ADD]], i128* [[TMP1]], align 16 -// CHECK5-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP12:%.*]] = load i128, i128* [[TMP4]], align 16 +// CHECK5-NEXT: [[TMP13:%.*]] = load i128, i128* [[Z]], align 16 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: store i128 [[ADD]], i128* [[TMP4]], align 16 +// CHECK5-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP11:%.*]] = load i128, i128* [[Z2]], align 16 -// CHECK5-NEXT: [[TMP12:%.*]] = bitcast i128* [[TMP1]] to i8* -// CHECK5-NEXT: [[TMP13:%.*]] = bitcast i128* [[ATOMIC_TEMP]] to i8* -// CHECK5-NEXT: call void @__atomic_load(i64 noundef 16, i8* noundef [[TMP12]], i8* noundef [[TMP13]], i32 noundef signext 0) #[[ATTR6]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i128, i128* [[Z]], align 16 +// CHECK5-NEXT: [[TMP15:%.*]] = bitcast i128* [[TMP4]] to i8* +// CHECK5-NEXT: [[TMP16:%.*]] = bitcast i128* [[ATOMIC_TEMP]] to i8* +// CHECK5-NEXT: call void @__atomic_load(i64 noundef 16, i8* noundef [[TMP15]], i8* noundef [[TMP16]], i32 noundef signext 0) #[[ATTR6]] // CHECK5-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK5: atomic_cont: -// CHECK5-NEXT: [[TMP14:%.*]] = load i128, i128* [[ATOMIC_TEMP]], align 16 -// CHECK5-NEXT: store i128 [[TMP14]], i128* [[TMP]], align 16 -// CHECK5-NEXT: [[TMP15:%.*]] = load i128, i128* [[TMP]], align 16 -// CHECK5-NEXT: [[TMP16:%.*]] = load i128, i128* [[Z2]], align 16 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP15]], [[TMP16]] -// CHECK5-NEXT: store i128 [[ADD4]], i128* [[ATOMIC_TEMP3]], align 16 -// CHECK5-NEXT: [[TMP17:%.*]] = bitcast i128* [[TMP1]] to i8* -// CHECK5-NEXT: [[TMP18:%.*]] = bitcast i128* [[ATOMIC_TEMP]] to i8* -// CHECK5-NEXT: [[TMP19:%.*]] = bitcast i128* [[ATOMIC_TEMP3]] to i8* -// CHECK5-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, i8* noundef [[TMP17]], i8* noundef [[TMP18]], i8* noundef [[TMP19]], i32 noundef signext 0, i32 noundef signext 0) #[[ATTR6]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i128, i128* [[ATOMIC_TEMP]], align 16 +// CHECK5-NEXT: store i128 [[TMP17]], i128* [[TMP]], align 16 +// CHECK5-NEXT: [[TMP18:%.*]] = load i128, i128* [[TMP]], align 16 +// CHECK5-NEXT: [[TMP19:%.*]] = load i128, i128* [[Z]], align 16 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i128 [[TMP18]], [[TMP19]] +// CHECK5-NEXT: store i128 [[ADD2]], i128* [[ATOMIC_TEMP1]], align 16 +// CHECK5-NEXT: [[TMP20:%.*]] = bitcast i128* [[TMP4]] to i8* +// CHECK5-NEXT: [[TMP21:%.*]] = bitcast i128* [[ATOMIC_TEMP]] to i8* +// CHECK5-NEXT: [[TMP22:%.*]] = bitcast i128* [[ATOMIC_TEMP1]] to i8* +// CHECK5-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, i8* noundef [[TMP20]], i8* noundef [[TMP21]], i8* noundef [[TMP22]], i32 noundef signext 0, i32 noundef signext 0) #[[ATTR6]] // CHECK5-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK5: atomic_exit: -// CHECK5-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -2625,19 +2758,23 @@ // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27 // CHECK7-SAME: () #[[ATTR0:[0-9]+]] { // CHECK7-NEXT: entry: -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK7-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK7-NEXT: ret void // // @@ -2646,33 +2783,39 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[Y_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 4 // CHECK7-NEXT: store i32* [[Y]], i32** [[Y_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 4 // CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[Y_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], i32* [[X_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[X_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], i32* [[Y_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[Y_CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP5]]) +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK7-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK7-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[Y]], i32* [[Y_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[X]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], i32* [[Y]], align 4 // CHECK7-NEXT: ret void // // @@ -2681,63 +2824,69 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca i32*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK7-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 4 // CHECK7-NEXT: store i32* [[Y]], i32** [[Y_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 4 // CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[Y_ADDR]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[TMP0]], i32* [[TMP1]]) +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store i32* [[TMP0]], i32** [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store i32* [[TMP1]], i32** [[TMP3]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[Y:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[X1:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[Y2:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK7-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 4 -// CHECK7-NEXT: store i32* [[Y]], i32** [[Y_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[Y_ADDR]], align 4 -// CHECK7-NEXT: store i32 0, i32* [[X1]], align 4 -// CHECK7-NEXT: store i32 0, i32* [[Y2]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP3:%.*]] = bitcast i32* [[X1]] to i8* -// CHECK7-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK7-NEXT: [[TMP5:%.*]] = bitcast i32* [[Y2]] to i8* -// CHECK7-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK7-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 2, i32 8, i8* [[TMP8]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK7-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK7-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK7-NEXT: store i32 0, i32* [[X]], align 4 +// CHECK7-NEXT: store i32 0, i32* [[Y]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP6:%.*]] = bitcast i32* [[X]] to i8* +// CHECK7-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP8:%.*]] = bitcast i32* [[Y]] to i8* +// CHECK7-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK7-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 2, i32 8, i8* [[TMP11]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: switch i32 [[TMP12]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK7-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK7-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK7-NEXT: ] // CHECK7: .omp.reduction.case1: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[X1]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[Y2]], align 4 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK7-NEXT: store i32 [[ADD3]], i32* [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[X]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK7-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[Y]], align 4 +// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[TMP4]], align 4 +// CHECK7-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.case2: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[X1]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP14]] monotonic, align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[Y2]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = atomicrmw add i32* [[TMP1]], i32 [[TMP16]] monotonic, align 4 -// CHECK7-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[X]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP17]] monotonic, align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[Y]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = atomicrmw add i32* [[TMP4]], i32 [[TMP19]] monotonic, align 4 +// CHECK7-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.default: // CHECK7-NEXT: ret void @@ -2781,25 +2930,30 @@ // CHECK7-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[X_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[X_CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP2]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK7-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[X]], align 4 // CHECK7-NEXT: ret void // // @@ -2807,25 +2961,30 @@ // CHECK7-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[X_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[X_CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP2]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK7-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[X]], align 4 // CHECK7-NEXT: ret void // // @@ -2833,25 +2992,30 @@ // CHECK7-SAME: (i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK7-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load i32*, i32** [[X_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[X_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[X_CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP2]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK7-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[X]], align 4 // CHECK7-NEXT: ret void // // @@ -2860,91 +3024,97 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca [88 x i32]*, align 4 // CHECK7-NEXT: [[Z_ADDR:%.*]] = alloca [99 x i32]*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK7-NEXT: store [88 x i32]* [[Y]], [88 x i32]** [[Y_ADDR]], align 4 // CHECK7-NEXT: store [99 x i32]* [[Z]], [99 x i32]** [[Z_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load [88 x i32]*, [88 x i32]** [[Y_ADDR]], align 4 // CHECK7-NEXT: [[TMP1:%.*]] = load [99 x i32]*, [99 x i32]** [[Z_ADDR]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [88 x i32]*, [99 x i32]*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), [88 x i32]* [[TMP0]], [99 x i32]* [[TMP1]]) +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store [88 x i32]* [[TMP0]], [88 x i32]** [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store [99 x i32]* [[TMP1]], [99 x i32]** [[TMP3]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [88 x i32]* noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], [99 x i32]* noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca [88 x i32]*, align 4 -// CHECK7-NEXT: [[Z_ADDR:%.*]] = alloca [99 x i32]*, align 4 -// CHECK7-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 +// CHECK7-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK7-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK7-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK7-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK7-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store [88 x i32]* [[Y]], [88 x i32]** [[Y_ADDR]], align 4 -// CHECK7-NEXT: store [99 x i32]* [[Z]], [99 x i32]** [[Z_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load [88 x i32]*, [88 x i32]** [[Y_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load [99 x i32]*, [99 x i32]** [[Z_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = bitcast [88 x i32]* [[Y1]] to i8* -// CHECK7-NEXT: [[TMP3:%.*]] = bitcast [88 x i32]* [[TMP0]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP2]], i8* align 4 [[TMP3]], i32 352, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], [99 x i32]* [[Z2]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP4]] +// CHECK7-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load [88 x i32]*, [88 x i32]** [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load [99 x i32]*, [99 x i32]** [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = bitcast [88 x i32]* [[Y]] to i8* +// CHECK7-NEXT: [[TMP6:%.*]] = bitcast [88 x i32]* [[TMP2]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i32 352, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], [99 x i32]* [[Z]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP7]] // CHECK7-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK7: omp.arrayinit.body: // CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK7-NEXT: store i32 0, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK7: omp.arrayinit.done: -// CHECK7-NEXT: [[LHS_BEGIN:%.*]] = bitcast [99 x i32]* [[TMP1]] to i32* -// CHECK7-NEXT: [[RHS_BEGIN:%.*]] = bitcast [99 x i32]* [[Z2]] to i32* -// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP6:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* -// CHECK7-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK7-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, i8* [[TMP9]], void (i8*, i8*)* @.omp.reduction.reduction_func.7, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK7-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK7-NEXT: [[LHS_BEGIN:%.*]] = bitcast [99 x i32]* [[TMP4]] to i32* +// CHECK7-NEXT: [[RHS_BEGIN:%.*]] = bitcast [99 x i32]* [[Z]] to i32* +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP9:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* +// CHECK7-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK7-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 1, i32 4, i8* [[TMP12]], void (i8*, i8*)* @.omp.reduction.reduction_func.7, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK7-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK7-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK7-NEXT: ] // CHECK7: .omp.reduction.case1: -// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP11]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP14]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: // CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK7-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done6: -// CHECK7-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP14]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done4: +// CHECK7-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.case2: -// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP14]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK7: omp.arraycpy.body8: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK7: omp.arraycpy.done14: -// CHECK7-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP17]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK7: omp.arraycpy.body6: +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP18]] monotonic, align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP17]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK7: omp.arraycpy.done12: +// CHECK7-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.default: // CHECK7-NEXT: ret void @@ -2990,91 +3160,97 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca [88 x i32]*, align 4 // CHECK7-NEXT: [[Z_ADDR:%.*]] = alloca [99 x i32]*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK7-NEXT: store [88 x i32]* [[Y]], [88 x i32]** [[Y_ADDR]], align 4 // CHECK7-NEXT: store [99 x i32]* [[Z]], [99 x i32]** [[Z_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load [88 x i32]*, [88 x i32]** [[Y_ADDR]], align 4 // CHECK7-NEXT: [[TMP1:%.*]] = load [99 x i32]*, [99 x i32]** [[Z_ADDR]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [88 x i32]*, [99 x i32]*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), [88 x i32]* [[TMP0]], [99 x i32]* [[TMP1]]) +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store [88 x i32]* [[TMP0]], [88 x i32]** [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store [99 x i32]* [[TMP1]], [99 x i32]** [[TMP3]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [88 x i32]* noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], [99 x i32]* noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca [88 x i32]*, align 4 -// CHECK7-NEXT: [[Z_ADDR:%.*]] = alloca [99 x i32]*, align 4 -// CHECK7-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK7-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK7-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK7-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK7-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK7-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store [88 x i32]* [[Y]], [88 x i32]** [[Y_ADDR]], align 4 -// CHECK7-NEXT: store [99 x i32]* [[Z]], [99 x i32]** [[Z_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load [88 x i32]*, [88 x i32]** [[Y_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load [99 x i32]*, [99 x i32]** [[Z_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = bitcast [88 x i32]* [[Y1]] to i8* -// CHECK7-NEXT: [[TMP3:%.*]] = bitcast [88 x i32]* [[TMP0]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP2]], i8* align 4 [[TMP3]], i32 352, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], [99 x i32]* [[Z2]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP4]] +// CHECK7-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load [88 x i32]*, [88 x i32]** [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load [99 x i32]*, [99 x i32]** [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = bitcast [88 x i32]* [[Y]] to i8* +// CHECK7-NEXT: [[TMP6:%.*]] = bitcast [88 x i32]* [[TMP2]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i32 352, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], [99 x i32]* [[Z]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP7]] // CHECK7-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK7: omp.arrayinit.body: // CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK7-NEXT: store i32 0, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK7: omp.arrayinit.done: -// CHECK7-NEXT: [[LHS_BEGIN:%.*]] = bitcast [99 x i32]* [[TMP1]] to i32* -// CHECK7-NEXT: [[RHS_BEGIN:%.*]] = bitcast [99 x i32]* [[Z2]] to i32* -// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP6:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* -// CHECK7-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK7-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, i8* [[TMP9]], void (i8*, i8*)* @.omp.reduction.reduction_func.9, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK7-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK7-NEXT: [[LHS_BEGIN:%.*]] = bitcast [99 x i32]* [[TMP4]] to i32* +// CHECK7-NEXT: [[RHS_BEGIN:%.*]] = bitcast [99 x i32]* [[Z]] to i32* +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP9:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8* +// CHECK7-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK7-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 1, i32 4, i8* [[TMP12]], void (i8*, i8*)* @.omp.reduction.reduction_func.9, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK7-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK7-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK7-NEXT: ] // CHECK7: .omp.reduction.case1: -// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP11]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP14]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: // CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK7-NEXT: store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done6: -// CHECK7-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP14]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done4: +// CHECK7-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.case2: -// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP14]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK7: omp.arraycpy.body8: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK7: omp.arraycpy.done14: -// CHECK7-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr i32, i32* [[LHS_BEGIN]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq i32* [[LHS_BEGIN]], [[TMP17]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK7: omp.arraycpy.body6: +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi i32* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP18]] monotonic, align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP17]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK7: omp.arraycpy.done12: +// CHECK7-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.default: // CHECK7-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp --- a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp @@ -442,7 +442,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -452,31 +452,35 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i64 [[TMP4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double -// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], double* [[A]], align 8 // CHECK1-NEXT: ret void // @@ -485,25 +489,30 @@ // CHECK1-SAME: (%struct.S1* noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 0) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP2]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double 2.500000e+00, double* [[A]], align 8 // CHECK1-NEXT: ret void // @@ -512,22 +521,26 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -535,41 +548,49 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 20, i32 0) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -579,8 +600,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -591,36 +611,39 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP4]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK1-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[B]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[B]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -894,7 +917,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -902,29 +925,35 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], double* [[A]], align 4 // CHECK3-NEXT: ret void // @@ -933,25 +962,30 @@ // CHECK3-SAME: (%struct.S1* noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 0) -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP2]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double 2.500000e+00, double* [[A]], align 4 // CHECK3-NEXT: ret void // @@ -960,21 +994,25 @@ // CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -982,40 +1020,48 @@ // CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 20, i32 0) -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1025,8 +1071,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -1036,34 +1081,39 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP4]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[B]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[B]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -1078,22 +1128,26 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1101,22 +1155,26 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1126,7 +1184,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -1136,31 +1194,35 @@ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double -// CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], double* [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1169,25 +1231,30 @@ // CHECK9-SAME: (%struct.S1* noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 0) -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP2]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double 2.500000e+00, double* [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1195,19 +1262,23 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 20, i32 0) -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1217,8 +1288,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -1229,36 +1299,39 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK9-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK9-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[B]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[B]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK9-NEXT: ret void // // @@ -1266,21 +1339,25 @@ // CHECK11-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1288,21 +1365,25 @@ // CHECK11-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1312,7 +1393,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -1320,29 +1401,35 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], double* [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1351,25 +1438,30 @@ // CHECK11-SAME: (%struct.S1* noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 0) -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP2]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double 2.500000e+00, double* [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1377,19 +1469,23 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 20, i32 0) -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1399,8 +1495,7 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -1410,33 +1505,38 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP4]], i32 [[TMP6]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[B]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[B]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp --- a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp +++ b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp @@ -460,7 +460,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -470,31 +470,35 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i64 [[TMP4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double -// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], double* [[A]], align 8 // CHECK1-NEXT: ret void // @@ -503,25 +507,30 @@ // CHECK1-SAME: (%struct.S1* noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 1024) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP2]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double 2.500000e+00, double* [[A]], align 8 // CHECK1-NEXT: ret void // @@ -531,6 +540,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 @@ -539,17 +549,20 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -557,41 +570,49 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 20) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -601,8 +622,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -613,36 +633,39 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* -// CHECK1-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP4]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK1-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], i16* [[B]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[B]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -933,7 +956,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -941,29 +964,35 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], double* [[A]], align 4 // CHECK3-NEXT: ret void // @@ -972,25 +1001,30 @@ // CHECK3-SAME: (%struct.S1* noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 1024) -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP2]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double 2.500000e+00, double* [[A]], align 4 // CHECK3-NEXT: ret void // @@ -1000,23 +1034,27 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_1]], i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1024,40 +1062,48 @@ // CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 20) -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1067,8 +1113,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -1078,34 +1123,39 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* -// CHECK3-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP4]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], i16* [[B]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[B]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -1121,6 +1171,7 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 @@ -1129,17 +1180,20 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1147,22 +1201,26 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1172,7 +1230,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -1182,31 +1240,35 @@ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double -// CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], double* [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1215,25 +1277,30 @@ // CHECK9-SAME: (%struct.S1* noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 1024) -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP2]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 8 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double 2.500000e+00, double* [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1241,19 +1308,23 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 20) -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1263,8 +1334,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -1275,36 +1345,39 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK9-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* -// CHECK9-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV1]], align 2 +// CHECK9-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 -// CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], i16* [[B]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[B]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK9-NEXT: ret void // // @@ -1313,23 +1386,27 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_1]], i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1337,21 +1414,25 @@ // CHECK11-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1361,7 +1442,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -1369,29 +1450,35 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], double* [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1400,25 +1487,30 @@ // CHECK11-SAME: (%struct.S1* noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 1024) -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store %struct.S1* [[TMP1]], %struct.S1** [[TMP2]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S1*, %struct.S1** [[TMP1]], align 4 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double 2.500000e+00, double* [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1426,19 +1518,23 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 20) -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1448,8 +1544,7 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -1459,33 +1554,38 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* -// CHECK11-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP4]], i32 [[TMP6]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK11-NEXT: store i16 [[TMP6]], i16* [[TMP5]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 -// CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], i16* [[B]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[B]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[A]], align 4 // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp --- a/clang/test/OpenMP/task_codegen.cpp +++ b/clang/test/OpenMP/task_codegen.cpp @@ -556,8 +556,8 @@ // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store i32 15, i32* @a, align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* @a, align 4 +// CHECK1-NEXT: store i32 15, i32* @a, align 4, !noalias !12 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* @a, align 4, !noalias !12 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP11]] to i8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP10]], i32 0, i32 0 // CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 @@ -602,7 +602,7 @@ // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !22 // CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 // CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 -// CHECK1-NEXT: store i32 15, i32* @a, align 4 +// CHECK1-NEXT: store i32 15, i32* @a, align 4, !noalias !22 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP10]], i32 0, i32 0 // CHECK1-NEXT: [[TMP12:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP11]], align 8 // CHECK1-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP12]], i64 0, i64 1 @@ -663,7 +663,7 @@ // CHECK1: .untied.jmp.1.i: // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !32 // CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] -// CHECK1-NEXT: store i32 1, i32* @a, align 4 +// CHECK1-NEXT: store i32 1, i32* @a, align 4, !noalias !32 // CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] // CHECK1-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !32 // CHECK1-NEXT: br label [[CLEANUP_I]] @@ -724,7 +724,7 @@ // CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR4]] // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__5_EXIT:%.*]] // CHECK1: .untied.jmp.1.i: -// CHECK1-NEXT: store i32 1, i32* @a, align 4 +// CHECK1-NEXT: store i32 1, i32* @a, align 4, !noalias !42 // CHECK1-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !42 // CHECK1-NEXT: br label [[CLEANUP_I]] // CHECK1: cleanup.i: @@ -784,7 +784,7 @@ // CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR4]] // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__7_EXIT:%.*]] // CHECK1: .untied.jmp.1.i: -// CHECK1-NEXT: store i32 1, i32* @a, align 4 +// CHECK1-NEXT: store i32 1, i32* @a, align 4, !noalias !52 // CHECK1-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK1-NEXT: br label [[CLEANUP_I]] // CHECK1: cleanup.i: @@ -826,7 +826,7 @@ // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !62 // CHECK1-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 // CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 -// CHECK1-NEXT: store i32 2, i32* @a, align 4 +// CHECK1-NEXT: store i32 2, i32* @a, align 4, !noalias !62 // CHECK1-NEXT: ret i32 0 // // @@ -862,7 +862,7 @@ // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !72 // CHECK1-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 // CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 -// CHECK1-NEXT: store i32 2, i32* @a, align 4 +// CHECK1-NEXT: store i32 2, i32* @a, align 4, !noalias !72 // CHECK1-NEXT: ret i32 0 // // @@ -898,7 +898,7 @@ // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !82 // CHECK1-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 // CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 -// CHECK1-NEXT: store i32 3, i32* @a, align 4 +// CHECK1-NEXT: store i32 3, i32* @a, align 4, !noalias !82 // CHECK1-NEXT: ret i32 0 // // @@ -934,7 +934,7 @@ // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !92 // CHECK1-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 // CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 -// CHECK1-NEXT: store i32 4, i32* @a, align 4 +// CHECK1-NEXT: store i32 4, i32* @a, align 4, !noalias !92 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP10]], i32 0, i32 0 // CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 // CHECK1-NEXT: store i32 5, i32* [[TMP12]], align 128 @@ -996,7 +996,7 @@ // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] // CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !102 // CHECK1-NEXT: store i32 4, i32* [[TMP16]], align 128 -// CHECK1-NEXT: store i32 4, i32* @a, align 4 +// CHECK1-NEXT: store i32 4, i32* @a, align 4, !noalias !102 // CHECK1-NEXT: ret i32 0 // // @@ -1291,87 +1291,92 @@ // CHECK1-SAME: () #[[ATTR8]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A:%.*]] = alloca float, align 4 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*)* @.omp_outlined..24 to void (i32*, i32*, ...)*), float* [[A]]) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store float* [[A]], float** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.23*)* @.omp_outlined..24 to void (i32*, i32*, ...)*), %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..24 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR9:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.23* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR9:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.23*, align 8 // CHECK1-NEXT: [[B:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float*, float** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store %struct.anon.23* [[__CONTEXT]], %struct.anon.23** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.23*, %struct.anon.23** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], %struct.anon.23* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store float* [[TMP0]], float** [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i64 48, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.24*)* @.omp_task_entry..27 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct.kmp_task_t_with_privates.24* -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24:%.*]], %struct.kmp_task_t_with_privates.24* [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.23* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 [[TMP11]], i64 8, i1 false) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24]], %struct.kmp_task_t_with_privates.24* [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_25:%.*]], %struct..kmp_privates.t.25* [[TMP12]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP14:%.*]] = load double, double* [[B]], align 8 -// CHECK1-NEXT: store double [[TMP14]], double* [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* [[TMP6]]) -// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store float* [[TMP2]], float** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1, i64 48, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.25*)* @.omp_task_entry..27 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct.kmp_task_t_with_privates.25* +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25:%.*]], %struct.kmp_task_t_with_privates.25* [[TMP9]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load i8*, i8** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.24* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 [[TMP13]], i64 8, i1 false) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25]], %struct.kmp_task_t_with_privates.25* [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_26:%.*]], %struct..kmp_privates.t.26* [[TMP14]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP16:%.*]] = load double, double* [[B]], align 8 +// CHECK1-NEXT: store double [[TMP16]], double* [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP8]]) +// CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_privates_map..26 -// CHECK1-SAME: (%struct..kmp_privates.t.25* noalias noundef [[TMP0:%.*]], double** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (%struct..kmp_privates.t.26* noalias noundef [[TMP0:%.*]], double** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.25*, align 8 +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.26*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca double**, align 8 -// CHECK1-NEXT: store %struct..kmp_privates.t.25* [[TMP0]], %struct..kmp_privates.t.25** [[DOTADDR]], align 8 +// CHECK1-NEXT: store %struct..kmp_privates.t.26* [[TMP0]], %struct..kmp_privates.t.26** [[DOTADDR]], align 8 // CHECK1-NEXT: store double** [[TMP1]], double*** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.25*, %struct..kmp_privates.t.25** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_25:%.*]], %struct..kmp_privates.t.25* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.26*, %struct..kmp_privates.t.26** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_26:%.*]], %struct..kmp_privates.t.26* [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = load double**, double*** [[DOTADDR1]], align 8 // CHECK1-NEXT: store double* [[TMP3]], double** [[TMP4]], align 8 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..27 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.24* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.25* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.23*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.24*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.24*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.25*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.24* [[TMP1]], %struct.kmp_task_t_with_privates.24** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.25* [[TMP1]], %struct.kmp_task_t_with_privates.25** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.24*, %struct.kmp_task_t_with_privates.24** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24:%.*]], %struct.kmp_task_t_with_privates.24* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.25*, %struct.kmp_task_t_with_privates.25** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25:%.*]], %struct.kmp_task_t_with_privates.25* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.23* -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24]], %struct.kmp_task_t_with_privates.24* [[TMP3]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.25* [[TMP9]] to i8* -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.24* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.24* +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25]], %struct.kmp_task_t_with_privates.25* [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.26* [[TMP9]] to i8* +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.25* [[TMP3]] to i8* // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META123:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META126:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META128:![0-9]+]]) @@ -1379,17 +1384,17 @@ // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !132 // CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !132 // CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !132 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.25*, double**)* @.omp_task_privates_map..26 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !132 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.26*, double**)* @.omp_task_privates_map..26 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !132 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !132 -// CHECK1-NEXT: store %struct.anon.23* [[TMP8]], %struct.anon.23** [[__CONTEXT_ADDR_I]], align 8, !noalias !132 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.23*, %struct.anon.23** [[__CONTEXT_ADDR_I]], align 8, !noalias !132 +// CHECK1-NEXT: store %struct.anon.24* [[TMP8]], %struct.anon.24** [[__CONTEXT_ADDR_I]], align 8, !noalias !132 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.24*, %struct.anon.24** [[__CONTEXT_ADDR_I]], align 8, !noalias !132 // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !132 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !132 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, double**)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], double** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] // CHECK1-NEXT: [[TMP16:%.*]] = load double*, double** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !132 // CHECK1-NEXT: [[TMP17:%.*]] = load double, double* [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], %struct.anon.23* [[TMP12]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], %struct.anon.24* [[TMP12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP18]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[TMP19]], align 4 // CHECK1-NEXT: [[CONV_I:%.*]] = fpext float [[TMP20]] to double @@ -1748,8 +1753,8 @@ // CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK2-NEXT: store i32 15, i32* @a, align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* @a, align 4 +// CHECK2-NEXT: store i32 15, i32* @a, align 4, !noalias !12 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* @a, align 4, !noalias !12 // CHECK2-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP11]] to i8 // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP10]], i32 0, i32 0 // CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 @@ -1794,7 +1799,7 @@ // CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !22 // CHECK2-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 // CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 -// CHECK2-NEXT: store i32 15, i32* @a, align 4 +// CHECK2-NEXT: store i32 15, i32* @a, align 4, !noalias !22 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP10]], i32 0, i32 0 // CHECK2-NEXT: [[TMP12:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP11]], align 8 // CHECK2-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP12]], i64 0, i64 1 @@ -1855,7 +1860,7 @@ // CHECK2: .untied.jmp.1.i: // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !32 // CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] -// CHECK2-NEXT: store i32 1, i32* @a, align 4 +// CHECK2-NEXT: store i32 1, i32* @a, align 4, !noalias !32 // CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] // CHECK2-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !32 // CHECK2-NEXT: br label [[CLEANUP_I]] @@ -1916,7 +1921,7 @@ // CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR4]] // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__5_EXIT:%.*]] // CHECK2: .untied.jmp.1.i: -// CHECK2-NEXT: store i32 1, i32* @a, align 4 +// CHECK2-NEXT: store i32 1, i32* @a, align 4, !noalias !42 // CHECK2-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !42 // CHECK2-NEXT: br label [[CLEANUP_I]] // CHECK2: cleanup.i: @@ -1976,7 +1981,7 @@ // CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR4]] // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__7_EXIT:%.*]] // CHECK2: .untied.jmp.1.i: -// CHECK2-NEXT: store i32 1, i32* @a, align 4 +// CHECK2-NEXT: store i32 1, i32* @a, align 4, !noalias !52 // CHECK2-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK2-NEXT: br label [[CLEANUP_I]] // CHECK2: cleanup.i: @@ -2018,7 +2023,7 @@ // CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !62 // CHECK2-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 // CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 -// CHECK2-NEXT: store i32 2, i32* @a, align 4 +// CHECK2-NEXT: store i32 2, i32* @a, align 4, !noalias !62 // CHECK2-NEXT: ret i32 0 // // @@ -2054,7 +2059,7 @@ // CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !72 // CHECK2-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 // CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 -// CHECK2-NEXT: store i32 2, i32* @a, align 4 +// CHECK2-NEXT: store i32 2, i32* @a, align 4, !noalias !72 // CHECK2-NEXT: ret i32 0 // // @@ -2090,7 +2095,7 @@ // CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !82 // CHECK2-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 // CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 -// CHECK2-NEXT: store i32 3, i32* @a, align 4 +// CHECK2-NEXT: store i32 3, i32* @a, align 4, !noalias !82 // CHECK2-NEXT: ret i32 0 // // @@ -2126,7 +2131,7 @@ // CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !92 // CHECK2-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 // CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 -// CHECK2-NEXT: store i32 4, i32* @a, align 4 +// CHECK2-NEXT: store i32 4, i32* @a, align 4, !noalias !92 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP10]], i32 0, i32 0 // CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 // CHECK2-NEXT: store i32 5, i32* [[TMP12]], align 128 @@ -2188,7 +2193,7 @@ // CHECK2-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] // CHECK2-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !102 // CHECK2-NEXT: store i32 4, i32* [[TMP16]], align 128 -// CHECK2-NEXT: store i32 4, i32* @a, align 4 +// CHECK2-NEXT: store i32 4, i32* @a, align 4, !noalias !102 // CHECK2-NEXT: ret i32 0 // // @@ -2483,87 +2488,92 @@ // CHECK2-SAME: () #[[ATTR8]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A:%.*]] = alloca float, align 4 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*)* @.omp_outlined..24 to void (i32*, i32*, ...)*), float* [[A]]) +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store float* [[A]], float** [[TMP0]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.23*)* @.omp_outlined..24 to void (i32*, i32*, ...)*), %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..24 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR9:[0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.23* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR9:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.23*, align 8 // CHECK2-NEXT: [[B:%.*]] = alloca double, align 8 -// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 8 +// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load float*, float** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: store %struct.anon.23* [[__CONTEXT]], %struct.anon.23** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.23*, %struct.anon.23** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], %struct.anon.23* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load float*, float** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK2-NEXT: store float* [[TMP0]], float** [[TMP5]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i64 48, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.24*)* @.omp_task_entry..27 to i32 (i32, i8*)*)) -// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct.kmp_task_t_with_privates.24* -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24:%.*]], %struct.kmp_task_t_with_privates.24* [[TMP7]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.23* [[AGG_CAPTURED]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 [[TMP11]], i64 8, i1 false) -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24]], %struct.kmp_task_t_with_privates.24* [[TMP7]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_25:%.*]], %struct..kmp_privates.t.25* [[TMP12]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP14:%.*]] = load double, double* [[B]], align 8 -// CHECK2-NEXT: store double [[TMP14]], double* [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* [[TMP6]]) -// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK2-NEXT: store float* [[TMP2]], float** [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1, i64 48, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.25*)* @.omp_task_entry..27 to i32 (i32, i8*)*)) +// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct.kmp_task_t_with_privates.25* +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25:%.*]], %struct.kmp_task_t_with_privates.25* [[TMP9]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP12:%.*]] = load i8*, i8** [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.24* [[AGG_CAPTURED]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 [[TMP13]], i64 8, i1 false) +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25]], %struct.kmp_task_t_with_privates.25* [[TMP9]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_26:%.*]], %struct..kmp_privates.t.26* [[TMP14]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP16:%.*]] = load double, double* [[B]], align 8 +// CHECK2-NEXT: store double [[TMP16]], double* [[TMP15]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP8]]) +// CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_task_privates_map..26 -// CHECK2-SAME: (%struct..kmp_privates.t.25* noalias noundef [[TMP0:%.*]], double** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK2-SAME: (%struct..kmp_privates.t.26* noalias noundef [[TMP0:%.*]], double** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.25*, align 8 +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.26*, align 8 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca double**, align 8 -// CHECK2-NEXT: store %struct..kmp_privates.t.25* [[TMP0]], %struct..kmp_privates.t.25** [[DOTADDR]], align 8 +// CHECK2-NEXT: store %struct..kmp_privates.t.26* [[TMP0]], %struct..kmp_privates.t.26** [[DOTADDR]], align 8 // CHECK2-NEXT: store double** [[TMP1]], double*** [[DOTADDR1]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.25*, %struct..kmp_privates.t.25** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_25:%.*]], %struct..kmp_privates.t.25* [[TMP2]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.26*, %struct..kmp_privates.t.26** [[DOTADDR]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_26:%.*]], %struct..kmp_privates.t.26* [[TMP2]], i32 0, i32 0 // CHECK2-NEXT: [[TMP4:%.*]] = load double**, double*** [[DOTADDR1]], align 8 // CHECK2-NEXT: store double* [[TMP3]], double** [[TMP4]], align 8 // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_task_entry..27 -// CHECK2-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.24* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK2-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.25* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK2-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK2-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.23*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.24*, align 8 // CHECK2-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca double*, align 8 // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.24*, align 8 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.25*, align 8 // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK2-NEXT: store %struct.kmp_task_t_with_privates.24* [[TMP1]], %struct.kmp_task_t_with_privates.24** [[DOTADDR1]], align 8 +// CHECK2-NEXT: store %struct.kmp_task_t_with_privates.25* [[TMP1]], %struct.kmp_task_t_with_privates.25** [[DOTADDR1]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.24*, %struct.kmp_task_t_with_privates.24** [[DOTADDR1]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24:%.*]], %struct.kmp_task_t_with_privates.24* [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.25*, %struct.kmp_task_t_with_privates.25** [[DOTADDR1]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25:%.*]], %struct.kmp_task_t_with_privates.25* [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.23* -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24]], %struct.kmp_task_t_with_privates.24* [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.25* [[TMP9]] to i8* -// CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.24* [[TMP3]] to i8* +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.24* +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25]], %struct.kmp_task_t_with_privates.25* [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.26* [[TMP9]] to i8* +// CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.25* [[TMP3]] to i8* // CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META123:![0-9]+]]) // CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META126:![0-9]+]]) // CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META128:![0-9]+]]) @@ -2571,17 +2581,17 @@ // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !132 // CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !132 // CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !132 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.25*, double**)* @.omp_task_privates_map..26 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !132 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.26*, double**)* @.omp_task_privates_map..26 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !132 // CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !132 -// CHECK2-NEXT: store %struct.anon.23* [[TMP8]], %struct.anon.23** [[__CONTEXT_ADDR_I]], align 8, !noalias !132 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon.23*, %struct.anon.23** [[__CONTEXT_ADDR_I]], align 8, !noalias !132 +// CHECK2-NEXT: store %struct.anon.24* [[TMP8]], %struct.anon.24** [[__CONTEXT_ADDR_I]], align 8, !noalias !132 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon.24*, %struct.anon.24** [[__CONTEXT_ADDR_I]], align 8, !noalias !132 // CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !132 // CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !132 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, double**)* // CHECK2-NEXT: call void [[TMP15]](i8* [[TMP14]], double** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] // CHECK2-NEXT: [[TMP16:%.*]] = load double*, double** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !132 // CHECK2-NEXT: [[TMP17:%.*]] = load double, double* [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], %struct.anon.23* [[TMP12]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], %struct.anon.24* [[TMP12]], i32 0, i32 0 // CHECK2-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP18]], align 8 // CHECK2-NEXT: [[TMP20:%.*]] = load float, float* [[TMP19]], align 4 // CHECK2-NEXT: [[CONV_I:%.*]] = fpext float [[TMP20]] to double @@ -2990,8 +3000,8 @@ // CHECK2-51-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK2-51-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK2-51-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK2-51-NEXT: store i32 15, i32* @a, align 4 -// CHECK2-51-NEXT: [[TMP11:%.*]] = load i32, i32* @a, align 4 +// CHECK2-51-NEXT: store i32 15, i32* @a, align 4, !noalias !12 +// CHECK2-51-NEXT: [[TMP11:%.*]] = load i32, i32* @a, align 4, !noalias !12 // CHECK2-51-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP11]] to i8 // CHECK2-51-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP10]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 @@ -3036,7 +3046,7 @@ // CHECK2-51-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !22 // CHECK2-51-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 // CHECK2-51-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 -// CHECK2-51-NEXT: store i32 15, i32* @a, align 4 +// CHECK2-51-NEXT: store i32 15, i32* @a, align 4, !noalias !22 // CHECK2-51-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP10]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP12:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP11]], align 8 // CHECK2-51-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP12]], i64 0, i64 1 @@ -3097,7 +3107,7 @@ // CHECK2-51: .untied.jmp.1.i: // CHECK2-51-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !32 // CHECK2-51-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] -// CHECK2-51-NEXT: store i32 1, i32* @a, align 4 +// CHECK2-51-NEXT: store i32 1, i32* @a, align 4, !noalias !32 // CHECK2-51-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] // CHECK2-51-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !32 // CHECK2-51-NEXT: br label [[CLEANUP_I]] @@ -3158,7 +3168,7 @@ // CHECK2-51-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR4]] // CHECK2-51-NEXT: br label [[DOTOMP_OUTLINED__5_EXIT:%.*]] // CHECK2-51: .untied.jmp.1.i: -// CHECK2-51-NEXT: store i32 1, i32* @a, align 4 +// CHECK2-51-NEXT: store i32 1, i32* @a, align 4, !noalias !42 // CHECK2-51-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !42 // CHECK2-51-NEXT: br label [[CLEANUP_I]] // CHECK2-51: cleanup.i: @@ -3218,7 +3228,7 @@ // CHECK2-51-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR4]] // CHECK2-51-NEXT: br label [[DOTOMP_OUTLINED__7_EXIT:%.*]] // CHECK2-51: .untied.jmp.1.i: -// CHECK2-51-NEXT: store i32 1, i32* @a, align 4 +// CHECK2-51-NEXT: store i32 1, i32* @a, align 4, !noalias !52 // CHECK2-51-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK2-51-NEXT: br label [[CLEANUP_I]] // CHECK2-51: cleanup.i: @@ -3260,7 +3270,7 @@ // CHECK2-51-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !62 // CHECK2-51-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 // CHECK2-51-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 -// CHECK2-51-NEXT: store i32 2, i32* @a, align 4 +// CHECK2-51-NEXT: store i32 2, i32* @a, align 4, !noalias !62 // CHECK2-51-NEXT: ret i32 0 // // @@ -3296,7 +3306,7 @@ // CHECK2-51-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !72 // CHECK2-51-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 // CHECK2-51-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 -// CHECK2-51-NEXT: store i32 2, i32* @a, align 4 +// CHECK2-51-NEXT: store i32 2, i32* @a, align 4, !noalias !72 // CHECK2-51-NEXT: ret i32 0 // // @@ -3332,7 +3342,7 @@ // CHECK2-51-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !82 // CHECK2-51-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 // CHECK2-51-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 -// CHECK2-51-NEXT: store i32 2, i32* @a, align 4 +// CHECK2-51-NEXT: store i32 2, i32* @a, align 4, !noalias !82 // CHECK2-51-NEXT: ret i32 0 // // @@ -3368,7 +3378,7 @@ // CHECK2-51-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !92 // CHECK2-51-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 // CHECK2-51-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 -// CHECK2-51-NEXT: store i32 3, i32* @a, align 4 +// CHECK2-51-NEXT: store i32 3, i32* @a, align 4, !noalias !92 // CHECK2-51-NEXT: ret i32 0 // // @@ -3404,7 +3414,7 @@ // CHECK2-51-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !102 // CHECK2-51-NEXT: store %struct.anon.16* [[TMP8]], %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !102 // CHECK2-51-NEXT: [[TMP10:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !102 -// CHECK2-51-NEXT: store i32 4, i32* @a, align 4 +// CHECK2-51-NEXT: store i32 4, i32* @a, align 4, !noalias !102 // CHECK2-51-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP10]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 // CHECK2-51-NEXT: store i32 5, i32* [[TMP12]], align 128 @@ -3466,7 +3476,7 @@ // CHECK2-51-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] // CHECK2-51-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !112 // CHECK2-51-NEXT: store i32 4, i32* [[TMP16]], align 128 -// CHECK2-51-NEXT: store i32 4, i32* @a, align 4 +// CHECK2-51-NEXT: store i32 4, i32* @a, align 4, !noalias !112 // CHECK2-51-NEXT: ret i32 0 // // @@ -3761,87 +3771,92 @@ // CHECK2-51-SAME: () #[[ATTR8]] { // CHECK2-51-NEXT: entry: // CHECK2-51-NEXT: [[A:%.*]] = alloca float, align 4 -// CHECK2-51-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), float* [[A]]) +// CHECK2-51-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 8 +// CHECK2-51-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-51-NEXT: store float* [[A]], float** [[TMP0]], align 8 +// CHECK2-51-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.25*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-51-NEXT: ret void // // // CHECK2-51-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK2-51-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR9:[0-9]+]] { +// CHECK2-51-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.25* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR9:[0-9]+]] { // CHECK2-51-NEXT: entry: // CHECK2-51-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-51-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-51-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK2-51-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.25*, align 8 // CHECK2-51-NEXT: [[B:%.*]] = alloca double, align 8 -// CHECK2-51-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 8 +// CHECK2-51-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_26:%.*]], align 8 // CHECK2-51-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-51-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-51-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 -// CHECK2-51-NEXT: [[TMP0:%.*]] = load float*, float** [[A_ADDR]], align 8 -// CHECK2-51-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-51-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-51-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK2-51-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-51-NEXT: store %struct.anon.25* [[__CONTEXT]], %struct.anon.25** [[__CONTEXT_ADDR]], align 8 +// CHECK2-51-NEXT: [[TMP0:%.*]] = load %struct.anon.25*, %struct.anon.25** [[__CONTEXT_ADDR]], align 8 +// CHECK2-51-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25:%.*]], %struct.anon.25* [[TMP0]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP2:%.*]] = load float*, float** [[TMP1]], align 8 +// CHECK2-51-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-51-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-51-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-51-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK2-51-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2-51: omp_if.then: -// CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK2-51-NEXT: store float* [[TMP0]], float** [[TMP5]], align 8 -// CHECK2-51-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i64 48, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.26*)* @.omp_task_entry..29 to i32 (i32, i8*)*)) -// CHECK2-51-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct.kmp_task_t_with_privates.26* -// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_26:%.*]], %struct.kmp_task_t_with_privates.26* [[TMP7]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK2-51-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.25* [[AGG_CAPTURED]] to i8* -// CHECK2-51-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 [[TMP11]], i64 8, i1 false) -// CHECK2-51-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_26]], %struct.kmp_task_t_with_privates.26* [[TMP7]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_27:%.*]], %struct..kmp_privates.t.27* [[TMP12]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP14:%.*]] = load double, double* [[B]], align 8 -// CHECK2-51-NEXT: store double [[TMP14]], double* [[TMP13]], align 8 -// CHECK2-51-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* [[TMP6]]) -// CHECK2-51-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK2-51-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK2-51-NEXT: store float* [[TMP2]], float** [[TMP7]], align 8 +// CHECK2-51-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1, i64 48, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.27*)* @.omp_task_entry..29 to i32 (i32, i8*)*)) +// CHECK2-51-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct.kmp_task_t_with_privates.27* +// CHECK2-51-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_27:%.*]], %struct.kmp_task_t_with_privates.27* [[TMP9]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP10]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP12:%.*]] = load i8*, i8** [[TMP11]], align 8 +// CHECK2-51-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.26* [[AGG_CAPTURED]] to i8* +// CHECK2-51-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 [[TMP13]], i64 8, i1 false) +// CHECK2-51-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_27]], %struct.kmp_task_t_with_privates.27* [[TMP9]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_28:%.*]], %struct..kmp_privates.t.28* [[TMP14]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP16:%.*]] = load double, double* [[B]], align 8 +// CHECK2-51-NEXT: store double [[TMP16]], double* [[TMP15]], align 8 +// CHECK2-51-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP8]]) +// CHECK2-51-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK2-51-NEXT: br label [[OMP_IF_END]] // CHECK2-51: omp_if.end: -// CHECK2-51-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK2-51-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]]) // CHECK2-51-NEXT: ret void // // // CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_privates_map..28 -// CHECK2-51-SAME: (%struct..kmp_privates.t.27* noalias noundef [[TMP0:%.*]], double** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK2-51-SAME: (%struct..kmp_privates.t.28* noalias noundef [[TMP0:%.*]], double** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { // CHECK2-51-NEXT: entry: -// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.27*, align 8 +// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.28*, align 8 // CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca double**, align 8 -// CHECK2-51-NEXT: store %struct..kmp_privates.t.27* [[TMP0]], %struct..kmp_privates.t.27** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: store %struct..kmp_privates.t.28* [[TMP0]], %struct..kmp_privates.t.28** [[DOTADDR]], align 8 // CHECK2-51-NEXT: store double** [[TMP1]], double*** [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.27*, %struct..kmp_privates.t.27** [[DOTADDR]], align 8 -// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_27:%.*]], %struct..kmp_privates.t.27* [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.28*, %struct..kmp_privates.t.28** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_28:%.*]], %struct..kmp_privates.t.28* [[TMP2]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP4:%.*]] = load double**, double*** [[DOTADDR1]], align 8 // CHECK2-51-NEXT: store double* [[TMP3]], double** [[TMP4]], align 8 // CHECK2-51-NEXT: ret void // // // CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_entry..29 -// CHECK2-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.26* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK2-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.27* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { // CHECK2-51-NEXT: entry: // CHECK2-51-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK2-51-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 // CHECK2-51-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK2-51-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK2-51-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK2-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.25*, align 8 +// CHECK2-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.26*, align 8 // CHECK2-51-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca double*, align 8 // CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.26*, align 8 +// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.27*, align 8 // CHECK2-51-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK2-51-NEXT: store %struct.kmp_task_t_with_privates.26* [[TMP1]], %struct.kmp_task_t_with_privates.26** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: store %struct.kmp_task_t_with_privates.27* [[TMP1]], %struct.kmp_task_t_with_privates.27** [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK2-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.26*, %struct.kmp_task_t_with_privates.26** [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_26:%.*]], %struct.kmp_task_t_with_privates.26* [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.27*, %struct.kmp_task_t_with_privates.27** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_27:%.*]], %struct.kmp_task_t_with_privates.27* [[TMP3]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK2-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.25* -// CHECK2-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_26]], %struct.kmp_task_t_with_privates.26* [[TMP3]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.27* [[TMP9]] to i8* -// CHECK2-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.26* [[TMP3]] to i8* +// CHECK2-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.26* +// CHECK2-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_27]], %struct.kmp_task_t_with_privates.27* [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.28* [[TMP9]] to i8* +// CHECK2-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.27* [[TMP3]] to i8* // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META133:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META136:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META138:![0-9]+]]) @@ -3849,17 +3864,17 @@ // CHECK2-51-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !142 // CHECK2-51-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !142 // CHECK2-51-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !142 -// CHECK2-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.27*, double**)* @.omp_task_privates_map..28 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !142 +// CHECK2-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.28*, double**)* @.omp_task_privates_map..28 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !142 // CHECK2-51-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !142 -// CHECK2-51-NEXT: store %struct.anon.25* [[TMP8]], %struct.anon.25** [[__CONTEXT_ADDR_I]], align 8, !noalias !142 -// CHECK2-51-NEXT: [[TMP12:%.*]] = load %struct.anon.25*, %struct.anon.25** [[__CONTEXT_ADDR_I]], align 8, !noalias !142 +// CHECK2-51-NEXT: store %struct.anon.26* [[TMP8]], %struct.anon.26** [[__CONTEXT_ADDR_I]], align 8, !noalias !142 +// CHECK2-51-NEXT: [[TMP12:%.*]] = load %struct.anon.26*, %struct.anon.26** [[__CONTEXT_ADDR_I]], align 8, !noalias !142 // CHECK2-51-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !142 // CHECK2-51-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !142 // CHECK2-51-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, double**)* // CHECK2-51-NEXT: call void [[TMP15]](i8* [[TMP14]], double** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] // CHECK2-51-NEXT: [[TMP16:%.*]] = load double*, double** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !142 // CHECK2-51-NEXT: [[TMP17:%.*]] = load double, double* [[TMP16]], align 8 -// CHECK2-51-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_25:%.*]], %struct.anon.25* [[TMP12]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_26:%.*]], %struct.anon.26* [[TMP12]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP18]], align 8 // CHECK2-51-NEXT: [[TMP20:%.*]] = load float, float* [[TMP19]], align 4 // CHECK2-51-NEXT: [[CONV_I:%.*]] = fpext float [[TMP20]] to double @@ -3877,27 +3892,27 @@ // CHECK2-51-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK2-51-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK2-51-NEXT: [[E:%.*]] = alloca i32, align 4 -// CHECK2-51-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_28:%.*]], align 1 +// CHECK2-51-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_29:%.*]], align 1 // CHECK2-51-NEXT: [[DOTDEP_ARR_ADDR:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 // CHECK2-51-NEXT: [[DEP_COUNTER_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-51-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_31:%.*]], align 1 +// CHECK2-51-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_32:%.*]], align 1 // CHECK2-51-NEXT: [[DOTDEP_ARR_ADDR2:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 // CHECK2-51-NEXT: [[DEP_COUNTER_ADDR3:%.*]] = alloca i64, align 8 -// CHECK2-51-NEXT: [[AGG_CAPTURED4:%.*]] = alloca [[STRUCT_ANON_34:%.*]], align 1 +// CHECK2-51-NEXT: [[AGG_CAPTURED4:%.*]] = alloca [[STRUCT_ANON_35:%.*]], align 1 // CHECK2-51-NEXT: [[DOTDEP_ARR_ADDR5:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 // CHECK2-51-NEXT: [[DEP_COUNTER_ADDR6:%.*]] = alloca i64, align 8 -// CHECK2-51-NEXT: [[AGG_CAPTURED7:%.*]] = alloca [[STRUCT_ANON_37:%.*]], align 1 +// CHECK2-51-NEXT: [[AGG_CAPTURED7:%.*]] = alloca [[STRUCT_ANON_38:%.*]], align 1 // CHECK2-51-NEXT: [[DOTDEP_ARR_ADDR8:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 // CHECK2-51-NEXT: [[DEP_COUNTER_ADDR9:%.*]] = alloca i64, align 8 -// CHECK2-51-NEXT: [[AGG_CAPTURED10:%.*]] = alloca [[STRUCT_ANON_40:%.*]], align 1 +// CHECK2-51-NEXT: [[AGG_CAPTURED10:%.*]] = alloca [[STRUCT_ANON_41:%.*]], align 1 // CHECK2-51-NEXT: [[DOTDEP_ARR_ADDR11:%.*]] = alloca [1 x %struct.kmp_depend_info], align 8 // CHECK2-51-NEXT: [[DEP_COUNTER_ADDR12:%.*]] = alloca i64, align 8 // CHECK2-51-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-51-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.29*)* @.omp_task_entry..32 to i32 (i32, i8*)*)) -// CHECK2-51-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.kmp_task_t_with_privates.29* -// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_29:%.*]], %struct.kmp_task_t_with_privates.29* [[TMP2]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_29]], %struct.kmp_task_t_with_privates.29* [[TMP2]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_30:%.*]], %struct..kmp_privates.t.30* [[TMP4]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.30*)* @.omp_task_entry..32 to i32 (i32, i8*)*)) +// CHECK2-51-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.kmp_task_t_with_privates.30* +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_30:%.*]], %struct.kmp_task_t_with_privates.30* [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_30]], %struct.kmp_task_t_with_privates.30* [[TMP2]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_31:%.*]], %struct..kmp_privates.t.31* [[TMP4]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 // CHECK2-51-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 // CHECK2-51-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR]], i64 0, i64 0 @@ -3919,11 +3934,11 @@ // CHECK2-51-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR]], align 8 // CHECK2-51-NEXT: [[TMP17:%.*]] = bitcast %struct.kmp_depend_info* [[TMP7]] to i8* // CHECK2-51-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP1]], i32 2, i8* [[TMP17]], i32 0, i8* null) -// CHECK2-51-NEXT: [[TMP19:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.32*)* @.omp_task_entry..35 to i32 (i32, i8*)*)) -// CHECK2-51-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to %struct.kmp_task_t_with_privates.32* -// CHECK2-51-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_32:%.*]], %struct.kmp_task_t_with_privates.32* [[TMP20]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_32]], %struct.kmp_task_t_with_privates.32* [[TMP20]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_33:%.*]], %struct..kmp_privates.t.33* [[TMP22]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP19:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.33*)* @.omp_task_entry..35 to i32 (i32, i8*)*)) +// CHECK2-51-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to %struct.kmp_task_t_with_privates.33* +// CHECK2-51-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_33:%.*]], %struct.kmp_task_t_with_privates.33* [[TMP20]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_33]], %struct.kmp_task_t_with_privates.33* [[TMP20]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_34:%.*]], %struct..kmp_privates.t.34* [[TMP22]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP24:%.*]] = load i32, i32* [[A]], align 4 // CHECK2-51-NEXT: store i32 [[TMP24]], i32* [[TMP23]], align 8 // CHECK2-51-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR2]], i64 0, i64 0 @@ -3945,11 +3960,11 @@ // CHECK2-51-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR3]], align 8 // CHECK2-51-NEXT: [[TMP35:%.*]] = bitcast %struct.kmp_depend_info* [[TMP25]] to i8* // CHECK2-51-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP19]], i32 2, i8* [[TMP35]], i32 0, i8* null) -// CHECK2-51-NEXT: [[TMP37:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.35*)* @.omp_task_entry..38 to i32 (i32, i8*)*)) -// CHECK2-51-NEXT: [[TMP38:%.*]] = bitcast i8* [[TMP37]] to %struct.kmp_task_t_with_privates.35* -// CHECK2-51-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_35:%.*]], %struct.kmp_task_t_with_privates.35* [[TMP38]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_35]], %struct.kmp_task_t_with_privates.35* [[TMP38]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_36:%.*]], %struct..kmp_privates.t.36* [[TMP40]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP37:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.36*)* @.omp_task_entry..38 to i32 (i32, i8*)*)) +// CHECK2-51-NEXT: [[TMP38:%.*]] = bitcast i8* [[TMP37]] to %struct.kmp_task_t_with_privates.36* +// CHECK2-51-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_36:%.*]], %struct.kmp_task_t_with_privates.36* [[TMP38]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_36]], %struct.kmp_task_t_with_privates.36* [[TMP38]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_37:%.*]], %struct..kmp_privates.t.37* [[TMP40]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP42:%.*]] = load i32, i32* [[A]], align 4 // CHECK2-51-NEXT: store i32 [[TMP42]], i32* [[TMP41]], align 8 // CHECK2-51-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR5]], i64 0, i64 0 @@ -3971,11 +3986,11 @@ // CHECK2-51-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR6]], align 8 // CHECK2-51-NEXT: [[TMP53:%.*]] = bitcast %struct.kmp_depend_info* [[TMP43]] to i8* // CHECK2-51-NEXT: [[TMP54:%.*]] = call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP37]], i32 2, i8* [[TMP53]], i32 0, i8* null) -// CHECK2-51-NEXT: [[TMP55:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.38*)* @.omp_task_entry..41 to i32 (i32, i8*)*)) -// CHECK2-51-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP55]] to %struct.kmp_task_t_with_privates.38* -// CHECK2-51-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_38:%.*]], %struct.kmp_task_t_with_privates.38* [[TMP56]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_38]], %struct.kmp_task_t_with_privates.38* [[TMP56]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_39:%.*]], %struct..kmp_privates.t.39* [[TMP58]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP55:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.39*)* @.omp_task_entry..41 to i32 (i32, i8*)*)) +// CHECK2-51-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP55]] to %struct.kmp_task_t_with_privates.39* +// CHECK2-51-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_39:%.*]], %struct.kmp_task_t_with_privates.39* [[TMP56]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_39]], %struct.kmp_task_t_with_privates.39* [[TMP56]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_40:%.*]], %struct..kmp_privates.t.40* [[TMP58]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP60:%.*]] = load i32, i32* [[A]], align 4 // CHECK2-51-NEXT: store i32 [[TMP60]], i32* [[TMP59]], align 8 // CHECK2-51-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], [2 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR8]], i64 0, i64 0 @@ -3997,11 +4012,11 @@ // CHECK2-51-NEXT: store i64 2, i64* [[DEP_COUNTER_ADDR9]], align 8 // CHECK2-51-NEXT: [[TMP71:%.*]] = bitcast %struct.kmp_depend_info* [[TMP61]] to i8* // CHECK2-51-NEXT: [[TMP72:%.*]] = call i32 @__kmpc_omp_task_with_deps(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP55]], i32 2, i8* [[TMP71]], i32 0, i8* null) -// CHECK2-51-NEXT: [[TMP73:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.41*)* @.omp_task_entry..44 to i32 (i32, i8*)*)) -// CHECK2-51-NEXT: [[TMP74:%.*]] = bitcast i8* [[TMP73]] to %struct.kmp_task_t_with_privates.41* -// CHECK2-51-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_41:%.*]], %struct.kmp_task_t_with_privates.41* [[TMP74]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_41]], %struct.kmp_task_t_with_privates.41* [[TMP74]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_42:%.*]], %struct..kmp_privates.t.42* [[TMP76]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP73:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.42*)* @.omp_task_entry..44 to i32 (i32, i8*)*)) +// CHECK2-51-NEXT: [[TMP74:%.*]] = bitcast i8* [[TMP73]] to %struct.kmp_task_t_with_privates.42* +// CHECK2-51-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_42:%.*]], %struct.kmp_task_t_with_privates.42* [[TMP74]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_42]], %struct.kmp_task_t_with_privates.42* [[TMP74]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_43:%.*]], %struct..kmp_privates.t.43* [[TMP76]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 // CHECK2-51-NEXT: store i32 [[TMP78]], i32* [[TMP77]], align 8 // CHECK2-51-NEXT: [[TMP79:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR11]], i64 0, i64 0 @@ -4019,43 +4034,43 @@ // // // CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_privates_map..31 -// CHECK2-51-SAME: (%struct..kmp_privates.t.30* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK2-51-SAME: (%struct..kmp_privates.t.31* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { // CHECK2-51-NEXT: entry: -// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.30*, align 8 +// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.31*, align 8 // CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 -// CHECK2-51-NEXT: store %struct..kmp_privates.t.30* [[TMP0]], %struct..kmp_privates.t.30** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: store %struct..kmp_privates.t.31* [[TMP0]], %struct..kmp_privates.t.31** [[DOTADDR]], align 8 // CHECK2-51-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.30*, %struct..kmp_privates.t.30** [[DOTADDR]], align 8 -// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_30:%.*]], %struct..kmp_privates.t.30* [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.31*, %struct..kmp_privates.t.31** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_31:%.*]], %struct..kmp_privates.t.31* [[TMP2]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 // CHECK2-51-NEXT: store i32* [[TMP3]], i32** [[TMP4]], align 8 // CHECK2-51-NEXT: ret void // // // CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_entry..32 -// CHECK2-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.29* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK2-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.30* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { // CHECK2-51-NEXT: entry: // CHECK2-51-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK2-51-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 // CHECK2-51-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK2-51-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK2-51-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK2-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.28*, align 8 +// CHECK2-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.29*, align 8 // CHECK2-51-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 // CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.29*, align 8 +// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.30*, align 8 // CHECK2-51-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK2-51-NEXT: store %struct.kmp_task_t_with_privates.29* [[TMP1]], %struct.kmp_task_t_with_privates.29** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: store %struct.kmp_task_t_with_privates.30* [[TMP1]], %struct.kmp_task_t_with_privates.30** [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK2-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.29*, %struct.kmp_task_t_with_privates.29** [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_29:%.*]], %struct.kmp_task_t_with_privates.29* [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.30*, %struct.kmp_task_t_with_privates.30** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_30:%.*]], %struct.kmp_task_t_with_privates.30* [[TMP3]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK2-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.28* -// CHECK2-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_29]], %struct.kmp_task_t_with_privates.29* [[TMP3]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.30* [[TMP9]] to i8* -// CHECK2-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.29* [[TMP3]] to i8* +// CHECK2-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.29* +// CHECK2-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_30]], %struct.kmp_task_t_with_privates.30* [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.31* [[TMP9]] to i8* +// CHECK2-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.30* [[TMP3]] to i8* // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META145:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META148:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META150:![0-9]+]]) @@ -4063,10 +4078,10 @@ // CHECK2-51-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !154 // CHECK2-51-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !154 // CHECK2-51-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !154 -// CHECK2-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.30*, i32**)* @.omp_task_privates_map..31 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !154 +// CHECK2-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.31*, i32**)* @.omp_task_privates_map..31 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !154 // CHECK2-51-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !154 -// CHECK2-51-NEXT: store %struct.anon.28* [[TMP8]], %struct.anon.28** [[__CONTEXT_ADDR_I]], align 8, !noalias !154 -// CHECK2-51-NEXT: [[TMP12:%.*]] = load %struct.anon.28*, %struct.anon.28** [[__CONTEXT_ADDR_I]], align 8, !noalias !154 +// CHECK2-51-NEXT: store %struct.anon.29* [[TMP8]], %struct.anon.29** [[__CONTEXT_ADDR_I]], align 8, !noalias !154 +// CHECK2-51-NEXT: [[TMP12:%.*]] = load %struct.anon.29*, %struct.anon.29** [[__CONTEXT_ADDR_I]], align 8, !noalias !154 // CHECK2-51-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !154 // CHECK2-51-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !154 // CHECK2-51-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* @@ -4077,43 +4092,43 @@ // // // CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_privates_map..34 -// CHECK2-51-SAME: (%struct..kmp_privates.t.33* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK2-51-SAME: (%struct..kmp_privates.t.34* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { // CHECK2-51-NEXT: entry: -// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.33*, align 8 +// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.34*, align 8 // CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 -// CHECK2-51-NEXT: store %struct..kmp_privates.t.33* [[TMP0]], %struct..kmp_privates.t.33** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: store %struct..kmp_privates.t.34* [[TMP0]], %struct..kmp_privates.t.34** [[DOTADDR]], align 8 // CHECK2-51-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.33*, %struct..kmp_privates.t.33** [[DOTADDR]], align 8 -// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_33:%.*]], %struct..kmp_privates.t.33* [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.34*, %struct..kmp_privates.t.34** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_34:%.*]], %struct..kmp_privates.t.34* [[TMP2]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 // CHECK2-51-NEXT: store i32* [[TMP3]], i32** [[TMP4]], align 8 // CHECK2-51-NEXT: ret void // // // CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_entry..35 -// CHECK2-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.32* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK2-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.33* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { // CHECK2-51-NEXT: entry: // CHECK2-51-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK2-51-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 // CHECK2-51-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK2-51-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK2-51-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK2-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.31*, align 8 +// CHECK2-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.32*, align 8 // CHECK2-51-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 // CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.32*, align 8 +// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.33*, align 8 // CHECK2-51-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK2-51-NEXT: store %struct.kmp_task_t_with_privates.32* [[TMP1]], %struct.kmp_task_t_with_privates.32** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: store %struct.kmp_task_t_with_privates.33* [[TMP1]], %struct.kmp_task_t_with_privates.33** [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK2-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.32*, %struct.kmp_task_t_with_privates.32** [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_32:%.*]], %struct.kmp_task_t_with_privates.32* [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.33*, %struct.kmp_task_t_with_privates.33** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_33:%.*]], %struct.kmp_task_t_with_privates.33* [[TMP3]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK2-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.31* -// CHECK2-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_32]], %struct.kmp_task_t_with_privates.32* [[TMP3]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.33* [[TMP9]] to i8* -// CHECK2-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.32* [[TMP3]] to i8* +// CHECK2-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.32* +// CHECK2-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_33]], %struct.kmp_task_t_with_privates.33* [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.34* [[TMP9]] to i8* +// CHECK2-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.33* [[TMP3]] to i8* // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META155:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META158:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META160:![0-9]+]]) @@ -4121,10 +4136,10 @@ // CHECK2-51-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !164 // CHECK2-51-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !164 // CHECK2-51-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !164 -// CHECK2-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.33*, i32**)* @.omp_task_privates_map..34 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !164 +// CHECK2-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.34*, i32**)* @.omp_task_privates_map..34 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !164 // CHECK2-51-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !164 -// CHECK2-51-NEXT: store %struct.anon.31* [[TMP8]], %struct.anon.31** [[__CONTEXT_ADDR_I]], align 8, !noalias !164 -// CHECK2-51-NEXT: [[TMP12:%.*]] = load %struct.anon.31*, %struct.anon.31** [[__CONTEXT_ADDR_I]], align 8, !noalias !164 +// CHECK2-51-NEXT: store %struct.anon.32* [[TMP8]], %struct.anon.32** [[__CONTEXT_ADDR_I]], align 8, !noalias !164 +// CHECK2-51-NEXT: [[TMP12:%.*]] = load %struct.anon.32*, %struct.anon.32** [[__CONTEXT_ADDR_I]], align 8, !noalias !164 // CHECK2-51-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !164 // CHECK2-51-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !164 // CHECK2-51-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* @@ -4135,43 +4150,43 @@ // // // CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_privates_map..37 -// CHECK2-51-SAME: (%struct..kmp_privates.t.36* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK2-51-SAME: (%struct..kmp_privates.t.37* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { // CHECK2-51-NEXT: entry: -// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.36*, align 8 +// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.37*, align 8 // CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 -// CHECK2-51-NEXT: store %struct..kmp_privates.t.36* [[TMP0]], %struct..kmp_privates.t.36** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: store %struct..kmp_privates.t.37* [[TMP0]], %struct..kmp_privates.t.37** [[DOTADDR]], align 8 // CHECK2-51-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.36*, %struct..kmp_privates.t.36** [[DOTADDR]], align 8 -// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_36:%.*]], %struct..kmp_privates.t.36* [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.37*, %struct..kmp_privates.t.37** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_37:%.*]], %struct..kmp_privates.t.37* [[TMP2]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 // CHECK2-51-NEXT: store i32* [[TMP3]], i32** [[TMP4]], align 8 // CHECK2-51-NEXT: ret void // // // CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_entry..38 -// CHECK2-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.35* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK2-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.36* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { // CHECK2-51-NEXT: entry: // CHECK2-51-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK2-51-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 // CHECK2-51-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK2-51-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK2-51-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK2-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.34*, align 8 +// CHECK2-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.35*, align 8 // CHECK2-51-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 // CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.35*, align 8 +// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.36*, align 8 // CHECK2-51-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK2-51-NEXT: store %struct.kmp_task_t_with_privates.35* [[TMP1]], %struct.kmp_task_t_with_privates.35** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: store %struct.kmp_task_t_with_privates.36* [[TMP1]], %struct.kmp_task_t_with_privates.36** [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK2-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.35*, %struct.kmp_task_t_with_privates.35** [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_35:%.*]], %struct.kmp_task_t_with_privates.35* [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.36*, %struct.kmp_task_t_with_privates.36** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_36:%.*]], %struct.kmp_task_t_with_privates.36* [[TMP3]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK2-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.34* -// CHECK2-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_35]], %struct.kmp_task_t_with_privates.35* [[TMP3]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.36* [[TMP9]] to i8* -// CHECK2-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.35* [[TMP3]] to i8* +// CHECK2-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.35* +// CHECK2-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_36]], %struct.kmp_task_t_with_privates.36* [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.37* [[TMP9]] to i8* +// CHECK2-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.36* [[TMP3]] to i8* // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META165:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META168:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META170:![0-9]+]]) @@ -4179,10 +4194,10 @@ // CHECK2-51-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !174 // CHECK2-51-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !174 // CHECK2-51-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !174 -// CHECK2-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.36*, i32**)* @.omp_task_privates_map..37 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !174 +// CHECK2-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.37*, i32**)* @.omp_task_privates_map..37 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !174 // CHECK2-51-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !174 -// CHECK2-51-NEXT: store %struct.anon.34* [[TMP8]], %struct.anon.34** [[__CONTEXT_ADDR_I]], align 8, !noalias !174 -// CHECK2-51-NEXT: [[TMP12:%.*]] = load %struct.anon.34*, %struct.anon.34** [[__CONTEXT_ADDR_I]], align 8, !noalias !174 +// CHECK2-51-NEXT: store %struct.anon.35* [[TMP8]], %struct.anon.35** [[__CONTEXT_ADDR_I]], align 8, !noalias !174 +// CHECK2-51-NEXT: [[TMP12:%.*]] = load %struct.anon.35*, %struct.anon.35** [[__CONTEXT_ADDR_I]], align 8, !noalias !174 // CHECK2-51-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !174 // CHECK2-51-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !174 // CHECK2-51-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* @@ -4193,43 +4208,43 @@ // // // CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_privates_map..40 -// CHECK2-51-SAME: (%struct..kmp_privates.t.39* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK2-51-SAME: (%struct..kmp_privates.t.40* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { // CHECK2-51-NEXT: entry: -// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.39*, align 8 +// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.40*, align 8 // CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 -// CHECK2-51-NEXT: store %struct..kmp_privates.t.39* [[TMP0]], %struct..kmp_privates.t.39** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: store %struct..kmp_privates.t.40* [[TMP0]], %struct..kmp_privates.t.40** [[DOTADDR]], align 8 // CHECK2-51-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.39*, %struct..kmp_privates.t.39** [[DOTADDR]], align 8 -// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_39:%.*]], %struct..kmp_privates.t.39* [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.40*, %struct..kmp_privates.t.40** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_40:%.*]], %struct..kmp_privates.t.40* [[TMP2]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 // CHECK2-51-NEXT: store i32* [[TMP3]], i32** [[TMP4]], align 8 // CHECK2-51-NEXT: ret void // // // CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_entry..41 -// CHECK2-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.38* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK2-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.39* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { // CHECK2-51-NEXT: entry: // CHECK2-51-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK2-51-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 // CHECK2-51-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK2-51-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK2-51-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK2-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.37*, align 8 +// CHECK2-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.38*, align 8 // CHECK2-51-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 // CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.38*, align 8 +// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.39*, align 8 // CHECK2-51-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK2-51-NEXT: store %struct.kmp_task_t_with_privates.38* [[TMP1]], %struct.kmp_task_t_with_privates.38** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: store %struct.kmp_task_t_with_privates.39* [[TMP1]], %struct.kmp_task_t_with_privates.39** [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK2-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.38*, %struct.kmp_task_t_with_privates.38** [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_38:%.*]], %struct.kmp_task_t_with_privates.38* [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.39*, %struct.kmp_task_t_with_privates.39** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_39:%.*]], %struct.kmp_task_t_with_privates.39* [[TMP3]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK2-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.37* -// CHECK2-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_38]], %struct.kmp_task_t_with_privates.38* [[TMP3]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.39* [[TMP9]] to i8* -// CHECK2-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.38* [[TMP3]] to i8* +// CHECK2-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.38* +// CHECK2-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_39]], %struct.kmp_task_t_with_privates.39* [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.40* [[TMP9]] to i8* +// CHECK2-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.39* [[TMP3]] to i8* // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META175:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META178:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META180:![0-9]+]]) @@ -4237,10 +4252,10 @@ // CHECK2-51-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !184 // CHECK2-51-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !184 // CHECK2-51-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !184 -// CHECK2-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.39*, i32**)* @.omp_task_privates_map..40 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !184 +// CHECK2-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.40*, i32**)* @.omp_task_privates_map..40 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !184 // CHECK2-51-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !184 -// CHECK2-51-NEXT: store %struct.anon.37* [[TMP8]], %struct.anon.37** [[__CONTEXT_ADDR_I]], align 8, !noalias !184 -// CHECK2-51-NEXT: [[TMP12:%.*]] = load %struct.anon.37*, %struct.anon.37** [[__CONTEXT_ADDR_I]], align 8, !noalias !184 +// CHECK2-51-NEXT: store %struct.anon.38* [[TMP8]], %struct.anon.38** [[__CONTEXT_ADDR_I]], align 8, !noalias !184 +// CHECK2-51-NEXT: [[TMP12:%.*]] = load %struct.anon.38*, %struct.anon.38** [[__CONTEXT_ADDR_I]], align 8, !noalias !184 // CHECK2-51-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !184 // CHECK2-51-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !184 // CHECK2-51-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* @@ -4251,43 +4266,43 @@ // // // CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_privates_map..43 -// CHECK2-51-SAME: (%struct..kmp_privates.t.42* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK2-51-SAME: (%struct..kmp_privates.t.43* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { // CHECK2-51-NEXT: entry: -// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.42*, align 8 +// CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.43*, align 8 // CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 -// CHECK2-51-NEXT: store %struct..kmp_privates.t.42* [[TMP0]], %struct..kmp_privates.t.42** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: store %struct..kmp_privates.t.43* [[TMP0]], %struct..kmp_privates.t.43** [[DOTADDR]], align 8 // CHECK2-51-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.42*, %struct..kmp_privates.t.42** [[DOTADDR]], align 8 -// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_42:%.*]], %struct..kmp_privates.t.42* [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.43*, %struct..kmp_privates.t.43** [[DOTADDR]], align 8 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_43:%.*]], %struct..kmp_privates.t.43* [[TMP2]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 // CHECK2-51-NEXT: store i32* [[TMP3]], i32** [[TMP4]], align 8 // CHECK2-51-NEXT: ret void // // // CHECK2-51-LABEL: define {{[^@]+}}@.omp_task_entry..44 -// CHECK2-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.41* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK2-51-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.42* noalias noundef [[TMP1:%.*]]) #[[ATTR3]] { // CHECK2-51-NEXT: entry: // CHECK2-51-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK2-51-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 // CHECK2-51-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK2-51-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK2-51-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK2-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.40*, align 8 +// CHECK2-51-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.41*, align 8 // CHECK2-51-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 // CHECK2-51-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.41*, align 8 +// CHECK2-51-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.42*, align 8 // CHECK2-51-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK2-51-NEXT: store %struct.kmp_task_t_with_privates.41* [[TMP1]], %struct.kmp_task_t_with_privates.41** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: store %struct.kmp_task_t_with_privates.42* [[TMP1]], %struct.kmp_task_t_with_privates.42** [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK2-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.41*, %struct.kmp_task_t_with_privates.41** [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_41:%.*]], %struct.kmp_task_t_with_privates.41* [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.42*, %struct.kmp_task_t_with_privates.42** [[DOTADDR1]], align 8 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_42:%.*]], %struct.kmp_task_t_with_privates.42* [[TMP3]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK2-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.40* -// CHECK2-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_41]], %struct.kmp_task_t_with_privates.41* [[TMP3]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.42* [[TMP9]] to i8* -// CHECK2-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.41* [[TMP3]] to i8* +// CHECK2-51-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.41* +// CHECK2-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_42]], %struct.kmp_task_t_with_privates.42* [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.43* [[TMP9]] to i8* +// CHECK2-51-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.42* [[TMP3]] to i8* // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META185:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META188:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META190:![0-9]+]]) @@ -4295,10 +4310,10 @@ // CHECK2-51-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !194 // CHECK2-51-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !194 // CHECK2-51-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !194 -// CHECK2-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.42*, i32**)* @.omp_task_privates_map..43 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !194 +// CHECK2-51-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.43*, i32**)* @.omp_task_privates_map..43 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !194 // CHECK2-51-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !194 -// CHECK2-51-NEXT: store %struct.anon.40* [[TMP8]], %struct.anon.40** [[__CONTEXT_ADDR_I]], align 8, !noalias !194 -// CHECK2-51-NEXT: [[TMP12:%.*]] = load %struct.anon.40*, %struct.anon.40** [[__CONTEXT_ADDR_I]], align 8, !noalias !194 +// CHECK2-51-NEXT: store %struct.anon.41* [[TMP8]], %struct.anon.41** [[__CONTEXT_ADDR_I]], align 8, !noalias !194 +// CHECK2-51-NEXT: [[TMP12:%.*]] = load %struct.anon.41*, %struct.anon.41** [[__CONTEXT_ADDR_I]], align 8, !noalias !194 // CHECK2-51-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !194 // CHECK2-51-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !194 // CHECK2-51-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* @@ -4676,8 +4691,8 @@ // CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK3-NEXT: store i32 15, i32* @a, align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* @a, align 4 +// CHECK3-NEXT: store i32 15, i32* @a, align 4, !noalias !12 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* @a, align 4, !noalias !12 // CHECK3-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP11]] to i8 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP10]], i32 0, i32 0 // CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 @@ -4722,7 +4737,7 @@ // CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !22 // CHECK3-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 // CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 -// CHECK3-NEXT: store i32 15, i32* @a, align 4 +// CHECK3-NEXT: store i32 15, i32* @a, align 4, !noalias !22 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP10]], i32 0, i32 0 // CHECK3-NEXT: [[TMP12:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP11]], align 8 // CHECK3-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP12]], i64 0, i64 1 @@ -4783,7 +4798,7 @@ // CHECK3: .untied.jmp.1.i: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] // CHECK3-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2_I]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] -// CHECK3-NEXT: store i32 1, i32* @a, align 4 +// CHECK3-NEXT: store i32 1, i32* @a, align 4, !noalias !32 // CHECK3-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2_I]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] // CHECK3-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !32 // CHECK3-NEXT: br label [[CLEANUP_I]] @@ -4844,7 +4859,7 @@ // CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR4]] // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__5_EXIT:%.*]] // CHECK3: .untied.jmp.1.i: -// CHECK3-NEXT: store i32 1, i32* @a, align 4 +// CHECK3-NEXT: store i32 1, i32* @a, align 4, !noalias !42 // CHECK3-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !42 // CHECK3-NEXT: br label [[CLEANUP_I]] // CHECK3: cleanup.i: @@ -4904,7 +4919,7 @@ // CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR4]] // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__7_EXIT:%.*]] // CHECK3: .untied.jmp.1.i: -// CHECK3-NEXT: store i32 1, i32* @a, align 4 +// CHECK3-NEXT: store i32 1, i32* @a, align 4, !noalias !52 // CHECK3-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK3-NEXT: br label [[CLEANUP_I]] // CHECK3: cleanup.i: @@ -4946,7 +4961,7 @@ // CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !62 // CHECK3-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 // CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 -// CHECK3-NEXT: store i32 2, i32* @a, align 4 +// CHECK3-NEXT: store i32 2, i32* @a, align 4, !noalias !62 // CHECK3-NEXT: ret i32 0 // // @@ -4982,7 +4997,7 @@ // CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !72 // CHECK3-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 // CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 -// CHECK3-NEXT: store i32 2, i32* @a, align 4 +// CHECK3-NEXT: store i32 2, i32* @a, align 4, !noalias !72 // CHECK3-NEXT: ret i32 0 // // @@ -5018,7 +5033,7 @@ // CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !82 // CHECK3-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 // CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 -// CHECK3-NEXT: store i32 3, i32* @a, align 4 +// CHECK3-NEXT: store i32 3, i32* @a, align 4, !noalias !82 // CHECK3-NEXT: ret i32 0 // // @@ -5054,7 +5069,7 @@ // CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !92 // CHECK3-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 // CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 -// CHECK3-NEXT: store i32 4, i32* @a, align 4 +// CHECK3-NEXT: store i32 4, i32* @a, align 4, !noalias !92 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP10]], i32 0, i32 0 // CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 // CHECK3-NEXT: store i32 5, i32* [[TMP12]], align 128 @@ -5116,7 +5131,7 @@ // CHECK3-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] // CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !102 // CHECK3-NEXT: store i32 4, i32* [[TMP16]], align 128 -// CHECK3-NEXT: store i32 4, i32* @a, align 4 +// CHECK3-NEXT: store i32 4, i32* @a, align 4, !noalias !102 // CHECK3-NEXT: ret i32 0 // // @@ -5755,8 +5770,8 @@ // CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK4-NEXT: store i32 15, i32* @a, align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* @a, align 4 +// CHECK4-NEXT: store i32 15, i32* @a, align 4, !noalias !12 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* @a, align 4, !noalias !12 // CHECK4-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP11]] to i8 // CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP10]], i32 0, i32 0 // CHECK4-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 @@ -5801,7 +5816,7 @@ // CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !22 // CHECK4-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 // CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 -// CHECK4-NEXT: store i32 15, i32* @a, align 4 +// CHECK4-NEXT: store i32 15, i32* @a, align 4, !noalias !22 // CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP10]], i32 0, i32 0 // CHECK4-NEXT: [[TMP12:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP11]], align 8 // CHECK4-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP12]], i64 0, i64 1 @@ -5862,7 +5877,7 @@ // CHECK4: .untied.jmp.1.i: // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] // CHECK4-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2_I]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] -// CHECK4-NEXT: store i32 1, i32* @a, align 4 +// CHECK4-NEXT: store i32 1, i32* @a, align 4, !noalias !32 // CHECK4-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2_I]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] // CHECK4-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !32 // CHECK4-NEXT: br label [[CLEANUP_I]] @@ -5923,7 +5938,7 @@ // CHECK4-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR4]] // CHECK4-NEXT: br label [[DOTOMP_OUTLINED__5_EXIT:%.*]] // CHECK4: .untied.jmp.1.i: -// CHECK4-NEXT: store i32 1, i32* @a, align 4 +// CHECK4-NEXT: store i32 1, i32* @a, align 4, !noalias !42 // CHECK4-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !42 // CHECK4-NEXT: br label [[CLEANUP_I]] // CHECK4: cleanup.i: @@ -5983,7 +5998,7 @@ // CHECK4-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR4]] // CHECK4-NEXT: br label [[DOTOMP_OUTLINED__7_EXIT:%.*]] // CHECK4: .untied.jmp.1.i: -// CHECK4-NEXT: store i32 1, i32* @a, align 4 +// CHECK4-NEXT: store i32 1, i32* @a, align 4, !noalias !52 // CHECK4-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK4-NEXT: br label [[CLEANUP_I]] // CHECK4: cleanup.i: @@ -6025,7 +6040,7 @@ // CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !62 // CHECK4-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 // CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 -// CHECK4-NEXT: store i32 2, i32* @a, align 4 +// CHECK4-NEXT: store i32 2, i32* @a, align 4, !noalias !62 // CHECK4-NEXT: ret i32 0 // // @@ -6061,7 +6076,7 @@ // CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !72 // CHECK4-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 // CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 -// CHECK4-NEXT: store i32 2, i32* @a, align 4 +// CHECK4-NEXT: store i32 2, i32* @a, align 4, !noalias !72 // CHECK4-NEXT: ret i32 0 // // @@ -6097,7 +6112,7 @@ // CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !82 // CHECK4-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 // CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 -// CHECK4-NEXT: store i32 3, i32* @a, align 4 +// CHECK4-NEXT: store i32 3, i32* @a, align 4, !noalias !82 // CHECK4-NEXT: ret i32 0 // // @@ -6133,7 +6148,7 @@ // CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !92 // CHECK4-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 // CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 -// CHECK4-NEXT: store i32 4, i32* @a, align 4 +// CHECK4-NEXT: store i32 4, i32* @a, align 4, !noalias !92 // CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP10]], i32 0, i32 0 // CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 // CHECK4-NEXT: store i32 5, i32* [[TMP12]], align 128 @@ -6195,7 +6210,7 @@ // CHECK4-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] // CHECK4-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !102 // CHECK4-NEXT: store i32 4, i32* [[TMP16]], align 128 -// CHECK4-NEXT: store i32 4, i32* @a, align 4 +// CHECK4-NEXT: store i32 4, i32* @a, align 4, !noalias !102 // CHECK4-NEXT: ret i32 0 // // @@ -6886,8 +6901,8 @@ // CHECK3-51-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK3-51-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK3-51-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK3-51-NEXT: store i32 15, i32* @a, align 4 -// CHECK3-51-NEXT: [[TMP11:%.*]] = load i32, i32* @a, align 4 +// CHECK3-51-NEXT: store i32 15, i32* @a, align 4, !noalias !12 +// CHECK3-51-NEXT: [[TMP11:%.*]] = load i32, i32* @a, align 4, !noalias !12 // CHECK3-51-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP11]] to i8 // CHECK3-51-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP10]], i32 0, i32 0 // CHECK3-51-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 @@ -6932,7 +6947,7 @@ // CHECK3-51-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !22 // CHECK3-51-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 // CHECK3-51-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 -// CHECK3-51-NEXT: store i32 15, i32* @a, align 4 +// CHECK3-51-NEXT: store i32 15, i32* @a, align 4, !noalias !22 // CHECK3-51-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP10]], i32 0, i32 0 // CHECK3-51-NEXT: [[TMP12:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP11]], align 8 // CHECK3-51-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP12]], i64 0, i64 1 @@ -6993,7 +7008,7 @@ // CHECK3-51: .untied.jmp.1.i: // CHECK3-51-NEXT: [[OMP_GLOBAL_THREAD_NUM2_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] // CHECK3-51-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2_I]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] -// CHECK3-51-NEXT: store i32 1, i32* @a, align 4 +// CHECK3-51-NEXT: store i32 1, i32* @a, align 4, !noalias !32 // CHECK3-51-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2_I]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] // CHECK3-51-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !32 // CHECK3-51-NEXT: br label [[CLEANUP_I]] @@ -7054,7 +7069,7 @@ // CHECK3-51-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR4]] // CHECK3-51-NEXT: br label [[DOTOMP_OUTLINED__5_EXIT:%.*]] // CHECK3-51: .untied.jmp.1.i: -// CHECK3-51-NEXT: store i32 1, i32* @a, align 4 +// CHECK3-51-NEXT: store i32 1, i32* @a, align 4, !noalias !42 // CHECK3-51-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !42 // CHECK3-51-NEXT: br label [[CLEANUP_I]] // CHECK3-51: cleanup.i: @@ -7114,7 +7129,7 @@ // CHECK3-51-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR4]] // CHECK3-51-NEXT: br label [[DOTOMP_OUTLINED__7_EXIT:%.*]] // CHECK3-51: .untied.jmp.1.i: -// CHECK3-51-NEXT: store i32 1, i32* @a, align 4 +// CHECK3-51-NEXT: store i32 1, i32* @a, align 4, !noalias !52 // CHECK3-51-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK3-51-NEXT: br label [[CLEANUP_I]] // CHECK3-51: cleanup.i: @@ -7156,7 +7171,7 @@ // CHECK3-51-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !62 // CHECK3-51-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 // CHECK3-51-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 -// CHECK3-51-NEXT: store i32 2, i32* @a, align 4 +// CHECK3-51-NEXT: store i32 2, i32* @a, align 4, !noalias !62 // CHECK3-51-NEXT: ret i32 0 // // @@ -7192,7 +7207,7 @@ // CHECK3-51-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !72 // CHECK3-51-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 // CHECK3-51-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 -// CHECK3-51-NEXT: store i32 2, i32* @a, align 4 +// CHECK3-51-NEXT: store i32 2, i32* @a, align 4, !noalias !72 // CHECK3-51-NEXT: ret i32 0 // // @@ -7228,7 +7243,7 @@ // CHECK3-51-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !82 // CHECK3-51-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 // CHECK3-51-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 -// CHECK3-51-NEXT: store i32 2, i32* @a, align 4 +// CHECK3-51-NEXT: store i32 2, i32* @a, align 4, !noalias !82 // CHECK3-51-NEXT: ret i32 0 // // @@ -7264,7 +7279,7 @@ // CHECK3-51-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !92 // CHECK3-51-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 // CHECK3-51-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 -// CHECK3-51-NEXT: store i32 3, i32* @a, align 4 +// CHECK3-51-NEXT: store i32 3, i32* @a, align 4, !noalias !92 // CHECK3-51-NEXT: ret i32 0 // // @@ -7300,7 +7315,7 @@ // CHECK3-51-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !102 // CHECK3-51-NEXT: store %struct.anon.16* [[TMP8]], %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !102 // CHECK3-51-NEXT: [[TMP10:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !102 -// CHECK3-51-NEXT: store i32 4, i32* @a, align 4 +// CHECK3-51-NEXT: store i32 4, i32* @a, align 4, !noalias !102 // CHECK3-51-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP10]], i32 0, i32 0 // CHECK3-51-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 // CHECK3-51-NEXT: store i32 5, i32* [[TMP12]], align 128 @@ -7362,7 +7377,7 @@ // CHECK3-51-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] // CHECK3-51-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !112 // CHECK3-51-NEXT: store i32 4, i32* [[TMP16]], align 128 -// CHECK3-51-NEXT: store i32 4, i32* @a, align 4 +// CHECK3-51-NEXT: store i32 4, i32* @a, align 4, !noalias !112 // CHECK3-51-NEXT: ret i32 0 // // @@ -8449,8 +8464,8 @@ // CHECK4-51-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK4-51-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK4-51-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK4-51-NEXT: store i32 15, i32* @a, align 4 -// CHECK4-51-NEXT: [[TMP11:%.*]] = load i32, i32* @a, align 4 +// CHECK4-51-NEXT: store i32 15, i32* @a, align 4, !noalias !12 +// CHECK4-51-NEXT: [[TMP11:%.*]] = load i32, i32* @a, align 4, !noalias !12 // CHECK4-51-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP11]] to i8 // CHECK4-51-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP10]], i32 0, i32 0 // CHECK4-51-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 @@ -8495,7 +8510,7 @@ // CHECK4-51-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !22 // CHECK4-51-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 // CHECK4-51-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 -// CHECK4-51-NEXT: store i32 15, i32* @a, align 4 +// CHECK4-51-NEXT: store i32 15, i32* @a, align 4, !noalias !22 // CHECK4-51-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP10]], i32 0, i32 0 // CHECK4-51-NEXT: [[TMP12:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP11]], align 8 // CHECK4-51-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP12]], i64 0, i64 1 @@ -8556,7 +8571,7 @@ // CHECK4-51: .untied.jmp.1.i: // CHECK4-51-NEXT: [[OMP_GLOBAL_THREAD_NUM2_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] // CHECK4-51-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2_I]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] -// CHECK4-51-NEXT: store i32 1, i32* @a, align 4 +// CHECK4-51-NEXT: store i32 1, i32* @a, align 4, !noalias !32 // CHECK4-51-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2_I]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] // CHECK4-51-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !32 // CHECK4-51-NEXT: br label [[CLEANUP_I]] @@ -8617,7 +8632,7 @@ // CHECK4-51-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR4]] // CHECK4-51-NEXT: br label [[DOTOMP_OUTLINED__5_EXIT:%.*]] // CHECK4-51: .untied.jmp.1.i: -// CHECK4-51-NEXT: store i32 1, i32* @a, align 4 +// CHECK4-51-NEXT: store i32 1, i32* @a, align 4, !noalias !42 // CHECK4-51-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !42 // CHECK4-51-NEXT: br label [[CLEANUP_I]] // CHECK4-51: cleanup.i: @@ -8677,7 +8692,7 @@ // CHECK4-51-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR4]] // CHECK4-51-NEXT: br label [[DOTOMP_OUTLINED__7_EXIT:%.*]] // CHECK4-51: .untied.jmp.1.i: -// CHECK4-51-NEXT: store i32 1, i32* @a, align 4 +// CHECK4-51-NEXT: store i32 1, i32* @a, align 4, !noalias !52 // CHECK4-51-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK4-51-NEXT: br label [[CLEANUP_I]] // CHECK4-51: cleanup.i: @@ -8719,7 +8734,7 @@ // CHECK4-51-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !62 // CHECK4-51-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 // CHECK4-51-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 -// CHECK4-51-NEXT: store i32 2, i32* @a, align 4 +// CHECK4-51-NEXT: store i32 2, i32* @a, align 4, !noalias !62 // CHECK4-51-NEXT: ret i32 0 // // @@ -8755,7 +8770,7 @@ // CHECK4-51-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !72 // CHECK4-51-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 // CHECK4-51-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 -// CHECK4-51-NEXT: store i32 2, i32* @a, align 4 +// CHECK4-51-NEXT: store i32 2, i32* @a, align 4, !noalias !72 // CHECK4-51-NEXT: ret i32 0 // // @@ -8791,7 +8806,7 @@ // CHECK4-51-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !82 // CHECK4-51-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 // CHECK4-51-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 -// CHECK4-51-NEXT: store i32 3, i32* @a, align 4 +// CHECK4-51-NEXT: store i32 3, i32* @a, align 4, !noalias !82 // CHECK4-51-NEXT: ret i32 0 // // @@ -8827,7 +8842,7 @@ // CHECK4-51-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !92 // CHECK4-51-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 // CHECK4-51-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 -// CHECK4-51-NEXT: store i32 4, i32* @a, align 4 +// CHECK4-51-NEXT: store i32 4, i32* @a, align 4, !noalias !92 // CHECK4-51-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP10]], i32 0, i32 0 // CHECK4-51-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 // CHECK4-51-NEXT: store i32 5, i32* [[TMP12]], align 128 @@ -8889,7 +8904,7 @@ // CHECK4-51-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] // CHECK4-51-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !102 // CHECK4-51-NEXT: store i32 4, i32* [[TMP16]], align 128 -// CHECK4-51-NEXT: store i32 4, i32* @a, align 4 +// CHECK4-51-NEXT: store i32 4, i32* @a, align 4, !noalias !102 // CHECK4-51-NEXT: ret i32 0 // // diff --git a/clang/test/OpenMP/task_if_codegen.cpp b/clang/test/OpenMP/task_if_codegen.cpp --- a/clang/test/OpenMP/task_if_codegen.cpp +++ b/clang/test/OpenMP/task_if_codegen.cpp @@ -90,26 +90,30 @@ // CHECK1-LABEL: define {{[^@]+}}@_Z9gtid_testv // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0 -// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) -// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @.omp_task_entry.(i32 [[TMP1]], %struct.kmp_task_t_with_privates* [[TMP3]]) #[[ATTR3:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* [[TMP3]]) +// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @.omp_task_entry.(i32 [[TMP2]], %struct.kmp_task_t_with_privates* [[TMP4]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* [[TMP3]]) // CHECK1-NEXT: ret void // // @@ -121,7 +125,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 @@ -132,7 +136,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) @@ -143,8 +147,8 @@ // CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: call void @_Z9gtid_testv() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -153,27 +157,27 @@ // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 -// CHECK1-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 -// CHECK1-NEXT: [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 -// CHECK1-NEXT: [[AGG_CAPTURED3:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED3:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK1-NEXT: [[DOTDEP_ARR_ADDR:%.*]] = alloca [1 x %struct.kmp_depend_info], align 8 // CHECK1-NEXT: [[DEP_COUNTER_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..3 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.kmp_task_t_with_privates.1* -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_entry..3 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.kmp_task_t_with_privates.2* +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP1]]) -// CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.3*)* @.omp_task_entry..5 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates.3* -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.4*)* @.omp_task_entry..5 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates.4* +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], %struct.kmp_task_t_with_privates.4* [[TMP6]], i32 0, i32 0 // CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP5]]) -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @.omp_task_entry..5(i32 [[TMP0]], %struct.kmp_task_t_with_privates.3* [[TMP6]]) #[[ATTR3]] +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @.omp_task_entry..5(i32 [[TMP0]], %struct.kmp_task_t_with_privates.4* [[TMP6]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP5]]) -// CHECK1-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.5*)* @.omp_task_entry..7 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to %struct.kmp_task_t_with_privates.5* -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP10]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.6*)* @.omp_task_entry..7 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to %struct.kmp_task_t_with_privates.6* +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], %struct.kmp_task_t_with_privates.6* [[TMP10]], i32 0, i32 0 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* @Arg, align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] @@ -182,13 +186,13 @@ // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP9]]) -// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @.omp_task_entry..7(i32 [[TMP0]], %struct.kmp_task_t_with_privates.5* [[TMP10]]) #[[ATTR3]] +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @.omp_task_entry..7(i32 [[TMP0]], %struct.kmp_task_t_with_privates.6* [[TMP10]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP9]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.7*)* @.omp_task_entry..9 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to %struct.kmp_task_t_with_privates.7* -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], %struct.kmp_task_t_with_privates.7* [[TMP16]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.8*)* @.omp_task_entry..9 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to %struct.kmp_task_t_with_privates.8* +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_8:%.*]], %struct.kmp_task_t_with_privates.8* [[TMP16]], i32 0, i32 0 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR]], i64 0, i64 0 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], %struct.kmp_depend_info* [[TMP18]], i64 0 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP19]], i32 0, i32 0 @@ -208,7 +212,7 @@ // CHECK1: omp_if.else6: // CHECK1-NEXT: call void @__kmpc_omp_wait_deps(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i8* [[TMP23]], i32 0, i8* null) // CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP15]]) -// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @.omp_task_entry..9(i32 [[TMP0]], %struct.kmp_task_t_with_privates.7* [[TMP16]]) #[[ATTR3]] +// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @.omp_task_entry..9(i32 [[TMP0]], %struct.kmp_task_t_with_privates.8* [[TMP16]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP15]]) // CHECK1-NEXT: br label [[OMP_IF_END7]] // CHECK1: omp_if.end7: @@ -218,26 +222,26 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..3 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.1*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.1* [[TMP1]], %struct.kmp_task_t_with_privates.1** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.1*, %struct.kmp_task_t_with_privates.1** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.1* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.2* [[TMP3]] to i8* // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) @@ -247,33 +251,33 @@ // CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 -// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: call void @_Z3fn7v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..5 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.4* noalias noundef [[TMP1:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.3*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.4*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.3* [[TMP1]], %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.4* [[TMP1]], %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.3*, %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.4*, %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], %struct.kmp_task_t_with_privates.4* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.3* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.3* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.4* [[TMP3]] to i8* // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META28:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) @@ -283,33 +287,33 @@ // CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !34 // CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !34 // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !34 -// CHECK1-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !34 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !34 +// CHECK1-NEXT: store %struct.anon.3* [[TMP8]], %struct.anon.3** [[__CONTEXT_ADDR_I]], align 8, !noalias !34 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR_I]], align 8, !noalias !34 // CHECK1-NEXT: call void @_Z3fn8v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..7 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.6* noalias noundef [[TMP1:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.5*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.6*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.5* [[TMP1]], %struct.kmp_task_t_with_privates.5** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.6* [[TMP1]], %struct.kmp_task_t_with_privates.6** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.5*, %struct.kmp_task_t_with_privates.5** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.6*, %struct.kmp_task_t_with_privates.6** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], %struct.kmp_task_t_with_privates.6* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.4* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.5* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.5* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.6* [[TMP3]] to i8* // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META40:![0-9]+]]) @@ -319,33 +323,33 @@ // CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !44 // CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !44 // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !44 -// CHECK1-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !44 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !44 +// CHECK1-NEXT: store %struct.anon.5* [[TMP8]], %struct.anon.5** [[__CONTEXT_ADDR_I]], align 8, !noalias !44 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR_I]], align 8, !noalias !44 // CHECK1-NEXT: call void @_Z3fn9v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..9 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.7* noalias noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.8* noalias noundef [[TMP1:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.7*, align 8 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.7*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.8*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.7* [[TMP1]], %struct.kmp_task_t_with_privates.7** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.8* [[TMP1]], %struct.kmp_task_t_with_privates.8** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.7*, %struct.kmp_task_t_with_privates.7** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], %struct.kmp_task_t_with_privates.7* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.8*, %struct.kmp_task_t_with_privates.8** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_8:%.*]], %struct.kmp_task_t_with_privates.8* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.6* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.7* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.7* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.8* [[TMP3]] to i8* // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META48:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META50:![0-9]+]]) @@ -355,8 +359,8 @@ // CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !54 // CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !54 // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !54 -// CHECK1-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !54 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !54 +// CHECK1-NEXT: store %struct.anon.7* [[TMP8]], %struct.anon.7** [[__CONTEXT_ADDR_I]], align 8, !noalias !54 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR_I]], align 8, !noalias !54 // CHECK1-NEXT: call void @_Z4fn10v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -365,33 +369,33 @@ // CHECK1-SAME: (i32 noundef [[ARG:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 1 -// CHECK1-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 1 -// CHECK1-NEXT: [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 1 -// CHECK1-NEXT: [[AGG_CAPTURED3:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED3:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 1 // CHECK1-NEXT: [[DOTDEP_ARR_ADDR:%.*]] = alloca [1 x %struct.kmp_depend_info], align 8 // CHECK1-NEXT: [[DEP_COUNTER_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED8:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED8:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 1 // CHECK1-NEXT: [[DOTDEP_ARR_ADDR9:%.*]] = alloca [1 x %struct.kmp_depend_info], align 8 // CHECK1-NEXT: [[DEP_COUNTER_ADDR10:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED15:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED15:%.*]] = alloca [[STRUCT_ANON_19:%.*]], align 1 // CHECK1-NEXT: [[DOTDEP_ARR_ADDR16:%.*]] = alloca [1 x %struct.kmp_depend_info], align 8 // CHECK1-NEXT: [[DEP_COUNTER_ADDR17:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[ARG]], i32* [[ARG_ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.9*)* @.omp_task_entry..11 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.kmp_task_t_with_privates.9* -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], %struct.kmp_task_t_with_privates.9* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.10*)* @.omp_task_entry..11 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.kmp_task_t_with_privates.10* +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_10:%.*]], %struct.kmp_task_t_with_privates.10* [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP1]]) -// CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.11*)* @.omp_task_entry..13 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates.11* -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_11:%.*]], %struct.kmp_task_t_with_privates.11* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.12*)* @.omp_task_entry..13 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates.12* +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_12:%.*]], %struct.kmp_task_t_with_privates.12* [[TMP6]], i32 0, i32 0 // CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP5]]) -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @.omp_task_entry..13(i32 [[TMP0]], %struct.kmp_task_t_with_privates.11* [[TMP6]]) #[[ATTR3]] +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @.omp_task_entry..13(i32 [[TMP0]], %struct.kmp_task_t_with_privates.12* [[TMP6]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP5]]) -// CHECK1-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.13*)* @.omp_task_entry..15 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to %struct.kmp_task_t_with_privates.13* -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_13:%.*]], %struct.kmp_task_t_with_privates.13* [[TMP10]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.14*)* @.omp_task_entry..15 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to %struct.kmp_task_t_with_privates.14* +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_14:%.*]], %struct.kmp_task_t_with_privates.14* [[TMP10]], i32 0, i32 0 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARG_ADDR]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] @@ -400,13 +404,13 @@ // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP9]]) -// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @.omp_task_entry..15(i32 [[TMP0]], %struct.kmp_task_t_with_privates.13* [[TMP10]]) #[[ATTR3]] +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @.omp_task_entry..15(i32 [[TMP0]], %struct.kmp_task_t_with_privates.14* [[TMP10]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP9]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.15*)* @.omp_task_entry..17 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to %struct.kmp_task_t_with_privates.15* -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_15:%.*]], %struct.kmp_task_t_with_privates.15* [[TMP16]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.16*)* @.omp_task_entry..17 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to %struct.kmp_task_t_with_privates.16* +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_16:%.*]], %struct.kmp_task_t_with_privates.16* [[TMP16]], i32 0, i32 0 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR]], i64 0, i64 0 // CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], %struct.kmp_depend_info* [[TMP18]], i64 0 @@ -427,13 +431,13 @@ // CHECK1: omp_if.else6: // CHECK1-NEXT: call void @__kmpc_omp_wait_deps(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i8* [[TMP24]], i32 0, i8* null) // CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP15]]) -// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @.omp_task_entry..17(i32 [[TMP0]], %struct.kmp_task_t_with_privates.15* [[TMP16]]) #[[ATTR3]] +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @.omp_task_entry..17(i32 [[TMP0]], %struct.kmp_task_t_with_privates.16* [[TMP16]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP15]]) // CHECK1-NEXT: br label [[OMP_IF_END7]] // CHECK1: omp_if.end7: -// CHECK1-NEXT: [[TMP28:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.17*)* @.omp_task_entry..19 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP29:%.*]] = bitcast i8* [[TMP28]] to %struct.kmp_task_t_with_privates.17* -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_17:%.*]], %struct.kmp_task_t_with_privates.17* [[TMP29]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.18*)* @.omp_task_entry..19 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast i8* [[TMP28]] to %struct.kmp_task_t_with_privates.18* +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_18:%.*]], %struct.kmp_task_t_with_privates.18* [[TMP29]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR9]], i64 0, i64 0 // CHECK1-NEXT: [[TMP32:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP31]], i64 0 @@ -454,13 +458,13 @@ // CHECK1: omp_if.else13: // CHECK1-NEXT: call void @__kmpc_omp_wait_deps(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i8* [[TMP37]], i32 0, i8* null) // CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP28]]) -// CHECK1-NEXT: [[TMP40:%.*]] = call i32 @.omp_task_entry..19(i32 [[TMP0]], %struct.kmp_task_t_with_privates.17* [[TMP29]]) #[[ATTR3]] +// CHECK1-NEXT: [[TMP40:%.*]] = call i32 @.omp_task_entry..19(i32 [[TMP0]], %struct.kmp_task_t_with_privates.18* [[TMP29]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP28]]) // CHECK1-NEXT: br label [[OMP_IF_END14]] // CHECK1: omp_if.end14: -// CHECK1-NEXT: [[TMP41:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.19*)* @.omp_task_entry..21 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP42:%.*]] = bitcast i8* [[TMP41]] to %struct.kmp_task_t_with_privates.19* -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_19:%.*]], %struct.kmp_task_t_with_privates.19* [[TMP42]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP41:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.20*)* @.omp_task_entry..21 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast i8* [[TMP41]] to %struct.kmp_task_t_with_privates.20* +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_20:%.*]], %struct.kmp_task_t_with_privates.20* [[TMP42]], i32 0, i32 0 // CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR16]], i64 0, i64 0 // CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint i32* [[ARG_ADDR]] to i64 // CHECK1-NEXT: [[TMP46:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP44]], i64 0 @@ -481,7 +485,7 @@ // CHECK1: omp_if.else20: // CHECK1-NEXT: call void @__kmpc_omp_wait_deps(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i8* [[TMP50]], i32 0, i8* null) // CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP41]]) -// CHECK1-NEXT: [[TMP53:%.*]] = call i32 @.omp_task_entry..21(i32 [[TMP0]], %struct.kmp_task_t_with_privates.19* [[TMP42]]) #[[ATTR3]] +// CHECK1-NEXT: [[TMP53:%.*]] = call i32 @.omp_task_entry..21(i32 [[TMP0]], %struct.kmp_task_t_with_privates.20* [[TMP42]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP41]]) // CHECK1-NEXT: br label [[OMP_IF_END21]] // CHECK1: omp_if.end21: @@ -489,26 +493,26 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..11 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.9* noalias noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.10* noalias noundef [[TMP1:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.9*, align 8 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.9*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.10*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.9* [[TMP1]], %struct.kmp_task_t_with_privates.9** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.10* [[TMP1]], %struct.kmp_task_t_with_privates.10** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.9*, %struct.kmp_task_t_with_privates.9** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], %struct.kmp_task_t_with_privates.9* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.10*, %struct.kmp_task_t_with_privates.10** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_10:%.*]], %struct.kmp_task_t_with_privates.10* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.8* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.9* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.9* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.10* [[TMP3]] to i8* // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]]) @@ -518,33 +522,33 @@ // CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !64 // CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !64 // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !64 -// CHECK1-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 +// CHECK1-NEXT: store %struct.anon.9* [[TMP8]], %struct.anon.9** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 // CHECK1-NEXT: call void @_Z3fn1v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..13 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.11* noalias noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.12* noalias noundef [[TMP1:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.11*, align 8 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.11*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.12*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.11* [[TMP1]], %struct.kmp_task_t_with_privates.11** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.12* [[TMP1]], %struct.kmp_task_t_with_privates.12** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.11*, %struct.kmp_task_t_with_privates.11** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_11:%.*]], %struct.kmp_task_t_with_privates.11* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.12*, %struct.kmp_task_t_with_privates.12** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_12:%.*]], %struct.kmp_task_t_with_privates.12* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.10* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.11* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.11* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.12* [[TMP3]] to i8* // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META65:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META68:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META70:![0-9]+]]) @@ -554,33 +558,33 @@ // CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !74 // CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !74 // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !74 -// CHECK1-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !74 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !74 +// CHECK1-NEXT: store %struct.anon.11* [[TMP8]], %struct.anon.11** [[__CONTEXT_ADDR_I]], align 8, !noalias !74 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR_I]], align 8, !noalias !74 // CHECK1-NEXT: call void @_Z3fn2v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..15 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.13* noalias noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.14* noalias noundef [[TMP1:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.12*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.13*, align 8 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.13*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.14*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.13* [[TMP1]], %struct.kmp_task_t_with_privates.13** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.14* [[TMP1]], %struct.kmp_task_t_with_privates.14** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.13*, %struct.kmp_task_t_with_privates.13** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_13:%.*]], %struct.kmp_task_t_with_privates.13* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.14*, %struct.kmp_task_t_with_privates.14** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_14:%.*]], %struct.kmp_task_t_with_privates.14* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.12* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.13* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.13* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.14* [[TMP3]] to i8* // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META75:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META78:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META80:![0-9]+]]) @@ -590,33 +594,33 @@ // CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !84 // CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !84 // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !84 -// CHECK1-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !84 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !84 +// CHECK1-NEXT: store %struct.anon.13* [[TMP8]], %struct.anon.13** [[__CONTEXT_ADDR_I]], align 8, !noalias !84 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR_I]], align 8, !noalias !84 // CHECK1-NEXT: call void @_Z3fn3v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..17 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.15* noalias noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.16* noalias noundef [[TMP1:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.14*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.15*, align 8 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.15*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.16*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.15* [[TMP1]], %struct.kmp_task_t_with_privates.15** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.16* [[TMP1]], %struct.kmp_task_t_with_privates.16** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.15*, %struct.kmp_task_t_with_privates.15** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_15:%.*]], %struct.kmp_task_t_with_privates.15* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.16*, %struct.kmp_task_t_with_privates.16** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_16:%.*]], %struct.kmp_task_t_with_privates.16* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.14* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.15* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.15* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.16* [[TMP3]] to i8* // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META85:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META88:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META90:![0-9]+]]) @@ -626,33 +630,33 @@ // CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !94 // CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !94 // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !94 -// CHECK1-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !94 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !94 +// CHECK1-NEXT: store %struct.anon.15* [[TMP8]], %struct.anon.15** [[__CONTEXT_ADDR_I]], align 8, !noalias !94 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR_I]], align 8, !noalias !94 // CHECK1-NEXT: call void @_Z3fn4v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..19 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.17* noalias noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.18* noalias noundef [[TMP1:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.16*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.17*, align 8 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.17*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.18*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.17* [[TMP1]], %struct.kmp_task_t_with_privates.17** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.18* [[TMP1]], %struct.kmp_task_t_with_privates.18** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.17*, %struct.kmp_task_t_with_privates.17** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_17:%.*]], %struct.kmp_task_t_with_privates.17* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.18*, %struct.kmp_task_t_with_privates.18** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_18:%.*]], %struct.kmp_task_t_with_privates.18* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.16* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.17* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.17* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.18* [[TMP3]] to i8* // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META95:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META98:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META100:![0-9]+]]) @@ -662,33 +666,33 @@ // CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !104 // CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !104 // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !104 -// CHECK1-NEXT: store %struct.anon.16* [[TMP8]], %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !104 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !104 +// CHECK1-NEXT: store %struct.anon.17* [[TMP8]], %struct.anon.17** [[__CONTEXT_ADDR_I]], align 8, !noalias !104 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR_I]], align 8, !noalias !104 // CHECK1-NEXT: call void @_Z3fn5v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..21 -// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.19* noalias noundef [[TMP1:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.20* noalias noundef [[TMP1:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.18*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.19*, align 8 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.19*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.20*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.19* [[TMP1]], %struct.kmp_task_t_with_privates.19** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.20* [[TMP1]], %struct.kmp_task_t_with_privates.20** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.19*, %struct.kmp_task_t_with_privates.19** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_19:%.*]], %struct.kmp_task_t_with_privates.19* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.20*, %struct.kmp_task_t_with_privates.20** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_20:%.*]], %struct.kmp_task_t_with_privates.20* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.18* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.19* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.19* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.20* [[TMP3]] to i8* // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META105:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META108:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META110:![0-9]+]]) @@ -698,8 +702,8 @@ // CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !114 // CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !114 // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !114 -// CHECK1-NEXT: store %struct.anon.18* [[TMP8]], %struct.anon.18** [[__CONTEXT_ADDR_I]], align 8, !noalias !114 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR_I]], align 8, !noalias !114 +// CHECK1-NEXT: store %struct.anon.19* [[TMP8]], %struct.anon.19** [[__CONTEXT_ADDR_I]], align 8, !noalias !114 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.19*, %struct.anon.19** [[__CONTEXT_ADDR_I]], align 8, !noalias !114 // CHECK1-NEXT: call void @_Z3fn6v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // diff --git a/clang/test/OpenMP/task_in_reduction_codegen.cpp b/clang/test/OpenMP/task_in_reduction_codegen.cpp --- a/clang/test/OpenMP/task_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/task_in_reduction_codegen.cpp @@ -68,7 +68,8 @@ // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_3:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 // CHECK1-NEXT: [[DOTTASK_RED_6:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -188,34 +189,44 @@ // CHECK1-NEXT: [[TMP57:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t.0]* [[DOTRD_INPUT_3]] to i8* // CHECK1-NEXT: [[TMP58:%.*]] = call i8* @__kmpc_taskred_init(i32 [[TMP0]], i32 2, i8* [[TMP57]]) // CHECK1-NEXT: store i8* [[TMP58]], i8** [[DOTTASK_RED_6]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[A]], i64 [[TMP2]], i16* [[VLA]], i8** [[DOTTASK_RED_]], i8** [[DOTTASK_RED_6]]) +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[A]], i32** [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP60]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i16* [[VLA]], i16** [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store i8** [[DOTTASK_RED_6]], i8*** [[TMP63]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store i32* [[A]], i32** [[TMP59]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_entry..11 to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP61:%.*]] = bitcast i8* [[TMP60]] to %struct.kmp_task_t_with_privates.2* -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP61]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP62]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP64:%.*]] = load i8*, i8** [[TMP63]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = bitcast %struct.anon.1* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP64]], i8* align 8 [[TMP65]], i64 8, i1 false) -// CHECK1-NEXT: [[TMP66:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP60]]) +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[A]], i32** [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.3*)* @.omp_task_entry..11 to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP66:%.*]] = bitcast i8* [[TMP65]] to %struct.kmp_task_t_with_privates.3* +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP66]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP67]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP69:%.*]] = load i8*, i8** [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP69]], i8* align 8 [[TMP70]], i64 8, i1 false) +// CHECK1-NEXT: [[TMP71:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP65]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP67:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP67]]) +// CHECK1-NEXT: [[TMP72:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP72]]) // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[C]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 5 +// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP68]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP73]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SD1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done8: -// CHECK1-NEXT: [[TMP69:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP69]] +// CHECK1-NEXT: [[TMP74:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP74]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ev @@ -492,55 +503,53 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[VLA:%.*]], i16* nonnull align 2 dereferenceable(2) [[D:%.*]], i8** nonnull align 8 dereferenceable(8) [[DOTTASK_RED_:%.*]], i8** nonnull align 8 dereferenceable(8) [[DOTTASK_RED_1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR2:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_1]], i8*** [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store i16* [[TMP2]], i16** [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store i8** [[TMP3]], i8*** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 4 -// CHECK1-NEXT: store i8** [[TMP4]], i8*** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 1, i64 56, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP16]], i8* align 8 [[TMP17]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP13]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP18]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = load i8*, i8** [[TMP3]], align 8 -// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP18]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP4]], align 8 -// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i8* [[TMP12]]) +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16*, i16** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i8**, i8*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store i16* [[TMP6]], i16** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[TMP8]], i8*** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 4 +// CHECK1-NEXT: store i8** [[TMP10]], i8*** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 1, i64 56, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.anon.1* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP22]], i8* align 8 [[TMP23]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP24]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP8]], align 8 +// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP24]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i8*, i8** [[TMP10]], align 8 +// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i8* [[TMP18]]) // CHECK1-NEXT: ret void // // @@ -571,7 +580,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 @@ -584,7 +593,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -597,9 +606,9 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 @@ -607,14 +616,14 @@ // CHECK1-NEXT: call void [[TMP17]](i8* [[TMP16]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3]] // CHECK1-NEXT: [[TMP18:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP19:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP20]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP18]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP21]] to i8* // CHECK1-NEXT: [[TMP25:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP23]], i8* [[TMP22]], i8* [[TMP24]]) #[[ATTR3]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP25]] to i32* -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP27:%.*]] = load i16*, i16** [[TMP26]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = mul nuw i64 [[TMP14]], 2 // CHECK1-NEXT: [[TMP29:%.*]] = udiv exact i64 [[TMP28]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) @@ -637,26 +646,26 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_entry..11 -// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias [[TMP1:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates.3* noalias [[TMP1:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.3*, align 8 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK1-NEXT: store %struct.kmp_task_t_with_privates.3* [[TMP1]], %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.3*, %struct.kmp_task_t_with_privates.3** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], %struct.kmp_task_t_with_privates.3* [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.2* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.3* [[TMP3]] to i8* // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) @@ -666,9 +675,9 @@ // CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 -// CHECK1-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP10]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP10]], i32 0, i32 0 // CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 // CHECK1-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP12]] to i8* diff --git a/clang/test/OpenMP/taskgroup_codegen.cpp b/clang/test/OpenMP/taskgroup_codegen.cpp --- a/clang/test/OpenMP/taskgroup_codegen.cpp +++ b/clang/test/OpenMP/taskgroup_codegen.cpp @@ -72,30 +72,34 @@ // CHECK1-LABEL: define {{[^@]+}}@_Z18parallel_taskgroupv // CHECK1-SAME: () #[[ATTR6:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR7:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR7:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: invoke void @_Z3foov() // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP2:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP3:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP3]]) #[[ATTR8]] +// CHECK1-NEXT: [[TMP4:%.*]] = extractvalue { i8*, i32 } [[TMP3]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP4]]) #[[ATTR8]] // CHECK1-NEXT: unreachable // // @@ -142,29 +146,33 @@ // DEBUG1-LABEL: define {{[^@]+}}@_Z18parallel_taskgroupv // DEBUG1-SAME: () #[[ATTR6:[0-9]+]] !dbg [[DBG20:![0-9]+]] { // DEBUG1-NEXT: entry: -// DEBUG1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB7:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)), !dbg [[DBG21:![0-9]+]] +// DEBUG1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// DEBUG1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB7:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG21:![0-9]+]] // DEBUG1-NEXT: ret void, !dbg [[DBG22:![0-9]+]] // // // DEBUG1-LABEL: define {{[^@]+}}@.omp_outlined. -// DEBUG1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR7:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG23:![0-9]+]] { +// DEBUG1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR7:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG23:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // DEBUG1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// DEBUG1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // DEBUG1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // DEBUG1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// DEBUG1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG24:![0-9]+]] -// DEBUG1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4, !dbg [[DBG24]] -// DEBUG1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP1]]), !dbg [[DBG24]] +// DEBUG1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// DEBUG1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8, !dbg [[DBG24:![0-9]+]] +// DEBUG1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG25:![0-9]+]] +// DEBUG1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4, !dbg [[DBG25]] +// DEBUG1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 [[TMP2]]), !dbg [[DBG25]] // DEBUG1-NEXT: invoke void @_Z3foov() -// DEBUG1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG25:![0-9]+]] +// DEBUG1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG26:![0-9]+]] // DEBUG1: invoke.cont: -// DEBUG1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB5]], i32 [[TMP1]]), !dbg [[DBG25]] -// DEBUG1-NEXT: ret void, !dbg [[DBG26:![0-9]+]] +// DEBUG1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB5]], i32 [[TMP2]]), !dbg [[DBG26]] +// DEBUG1-NEXT: ret void, !dbg [[DBG27:![0-9]+]] // DEBUG1: terminate.lpad: -// DEBUG1-NEXT: [[TMP2:%.*]] = landingpad { i8*, i32 } -// DEBUG1-NEXT: catch i8* null, !dbg [[DBG25]] -// DEBUG1-NEXT: [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 0, !dbg [[DBG25]] -// DEBUG1-NEXT: call void @__clang_call_terminate(i8* [[TMP3]]) #[[ATTR8]], !dbg [[DBG25]] -// DEBUG1-NEXT: unreachable, !dbg [[DBG25]] +// DEBUG1-NEXT: [[TMP3:%.*]] = landingpad { i8*, i32 } +// DEBUG1-NEXT: catch i8* null, !dbg [[DBG26]] +// DEBUG1-NEXT: [[TMP4:%.*]] = extractvalue { i8*, i32 } [[TMP3]], 0, !dbg [[DBG26]] +// DEBUG1-NEXT: call void @__clang_call_terminate(i8* [[TMP4]]) #[[ATTR8]], !dbg [[DBG26]] +// DEBUG1-NEXT: unreachable, !dbg [[DBG26]] // diff --git a/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp --- a/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp @@ -55,6 +55,7 @@ // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_3:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 // CHECK1-NEXT: [[DOTTASK_RED_6:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -174,24 +175,34 @@ // CHECK1-NEXT: [[TMP57:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t.0]* [[DOTRD_INPUT_3]] to i8* // CHECK1-NEXT: [[TMP58:%.*]] = call i8* @__kmpc_taskred_init(i32 [[TMP0]], i32 2, i8* [[TMP57]]) // CHECK1-NEXT: store i8* [[TMP58]], i8** [[DOTTASK_RED_6]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[A]], i64 [[TMP2]], i16* [[VLA]], i8** [[DOTTASK_RED_]], i8** [[DOTTASK_RED_6]]) +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[A]], i32** [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP60]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i16* [[VLA]], i16** [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store i8** [[DOTTASK_RED_6]], i8*** [[TMP63]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP59:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP59]]) +// CHECK1-NEXT: [[TMP64:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP64]]) // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[C]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 5 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP60]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP65]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SD1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done8: -// CHECK1-NEXT: [[TMP61:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP61]] +// CHECK1-NEXT: [[TMP66:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP66]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ev @@ -468,68 +479,66 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[VLA:%.*]], i16* nonnull align 2 dereferenceable(2) [[D:%.*]], i8** nonnull align 8 dereferenceable(8) [[DOTTASK_RED_:%.*]], i8** nonnull align 8 dereferenceable(8) [[DOTTASK_RED_1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR2:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_1]], i8*** [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store i16* [[TMP2]], i16** [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store i8** [[TMP3]], i8*** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 4 -// CHECK1-NEXT: store i8** [[TMP4]], i8*** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) -// CHECK1-NEXT: [[TMP12:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP16]], i8* align 8 [[TMP17]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP13]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP18]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = load i8*, i8** [[TMP3]], align 8 -// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP18]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP4]], align 8 -// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, i64* [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 6 -// CHECK1-NEXT: store i64 4, i64* [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, i64* [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 9 -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP27]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP28:%.*]] = load i64, i64* [[TMP25]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i8* [[TMP12]], i32 1, i64* [[TMP23]], i64* [[TMP24]], i64 [[TMP28]], i32 1, i32 0, i64 0, i8* null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16*, i16** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i8**, i8*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store i16* [[TMP6]], i16** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[TMP8]], i8*** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 4 +// CHECK1-NEXT: store i8** [[TMP10]], i8*** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP18:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.anon.1* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP22]], i8* align 8 [[TMP23]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP24]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP8]], align 8 +// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP24]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i8*, i8** [[TMP10]], align 8 +// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 6 +// CHECK1-NEXT: store i64 4, i64* [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, i64* [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP33]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, i64* [[TMP31]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i8* [[TMP18]], i32 1, i64* [[TMP29]], i64* [[TMP30]], i64 [[TMP34]], i32 1, i32 0, i64 0, i8* null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) // CHECK1-NEXT: ret void // // @@ -565,7 +574,7 @@ // CHECK1-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[I_I:%.*]] = alloca i32, align 4 @@ -580,7 +589,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -609,9 +618,9 @@ // CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP22]], i32 0, i32 1 // CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 @@ -619,14 +628,14 @@ // CHECK1-NEXT: call void [[TMP27]](i8* [[TMP26]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3]] // CHECK1-NEXT: [[TMP28:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = load i8*, i8** [[TMP28]], align 8 // CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP31]] to i8* // CHECK1-NEXT: [[TMP35:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP33]], i8* [[TMP32]], i8* [[TMP34]]) #[[ATTR3]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP35]] to i32* -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 2 // CHECK1-NEXT: [[TMP37:%.*]] = load i16*, i16** [[TMP36]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP24]], 2 // CHECK1-NEXT: [[TMP39:%.*]] = udiv exact i64 [[TMP38]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) diff --git a/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp --- a/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp @@ -55,6 +55,7 @@ // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_3:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 // CHECK1-NEXT: [[DOTTASK_RED_6:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -174,24 +175,34 @@ // CHECK1-NEXT: [[TMP57:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t.0]* [[DOTRD_INPUT_3]] to i8* // CHECK1-NEXT: [[TMP58:%.*]] = call i8* @__kmpc_taskred_init(i32 [[TMP0]], i32 2, i8* [[TMP57]]) // CHECK1-NEXT: store i8* [[TMP58]], i8** [[DOTTASK_RED_6]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[A]], i64 [[TMP2]], i16* [[VLA]], i8** [[DOTTASK_RED_]], i8** [[DOTTASK_RED_6]]) +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[A]], i32** [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP60]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i16* [[VLA]], i16** [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store i8** [[DOTTASK_RED_6]], i8*** [[TMP63]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP59:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP59]]) +// CHECK1-NEXT: [[TMP64:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP64]]) // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[C]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 5 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP60]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP65]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SD1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done8: -// CHECK1-NEXT: [[TMP61:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP61]] +// CHECK1-NEXT: [[TMP66:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP66]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ev @@ -468,68 +479,66 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[VLA:%.*]], i16* nonnull align 2 dereferenceable(2) [[D:%.*]], i8** nonnull align 8 dereferenceable(8) [[DOTTASK_RED_:%.*]], i8** nonnull align 8 dereferenceable(8) [[DOTTASK_RED_1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR2:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_1]], i8*** [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store i16* [[TMP2]], i16** [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store i8** [[TMP3]], i8*** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 4 -// CHECK1-NEXT: store i8** [[TMP4]], i8*** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) -// CHECK1-NEXT: [[TMP12:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP16]], i8* align 8 [[TMP17]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP13]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP18]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = load i8*, i8** [[TMP3]], align 8 -// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP18]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP4]], align 8 -// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, i64* [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 6 -// CHECK1-NEXT: store i64 4, i64* [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, i64* [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 9 -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP27]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP28:%.*]] = load i64, i64* [[TMP25]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i8* [[TMP12]], i32 1, i64* [[TMP23]], i64* [[TMP24]], i64 [[TMP28]], i32 1, i32 0, i64 0, i8* null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16*, i16** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i8**, i8*** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store i16* [[TMP6]], i16** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[TMP8]], i8*** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 4 +// CHECK1-NEXT: store i8** [[TMP10]], i8*** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP18:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.anon.1* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP22]], i8* align 8 [[TMP23]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP24]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP8]], align 8 +// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP24]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i8*, i8** [[TMP10]], align 8 +// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 6 +// CHECK1-NEXT: store i64 4, i64* [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, i64* [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP20]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP33]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, i64* [[TMP31]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], i8* [[TMP18]], i32 1, i64* [[TMP29]], i64* [[TMP30]], i64 [[TMP34]], i32 1, i32 0, i64 0, i8* null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) // CHECK1-NEXT: ret void // // @@ -565,7 +574,7 @@ // CHECK1-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[I_I:%.*]] = alloca i32, align 4 @@ -580,7 +589,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -609,9 +618,9 @@ // CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP22]], i32 0, i32 1 // CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 @@ -619,14 +628,14 @@ // CHECK1-NEXT: call void [[TMP27]](i8* [[TMP26]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3]] // CHECK1-NEXT: [[TMP28:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = load i8*, i8** [[TMP28]], align 8 // CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP31]] to i8* // CHECK1-NEXT: [[TMP35:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP33]], i8* [[TMP32]], i8* [[TMP34]]) #[[ATTR3]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP35]] to i32* -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 2 // CHECK1-NEXT: [[TMP37:%.*]] = load i16*, i16** [[TMP36]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP24]], 2 // CHECK1-NEXT: [[TMP39:%.*]] = udiv exact i64 [[TMP38]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) diff --git a/clang/test/OpenMP/teams_codegen.cpp b/clang/test/OpenMP/teams_codegen.cpp --- a/clang/test/OpenMP/teams_codegen.cpp +++ b/clang/test/OpenMP/teams_codegen.cpp @@ -580,25 +580,30 @@ // CHECK1-SAME: (i64 noundef [[COMP:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[COMP]], i64* [[COMP_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[COMP_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[COMP]], i32** [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -606,25 +611,30 @@ // CHECK1-SAME: (i64 noundef [[COMP:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[COMP]], i64* [[COMP_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[COMP_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[COMP]], i32** [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -633,6 +643,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[LA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[LA]], i64* [[LA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[COMP]], i64* [[COMP_ADDR]], align 8 @@ -640,23 +651,27 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[COMP_ADDR]] to i32* // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[CONV1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV1]], i32** [[TMP2]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[COMP]], i32** [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -665,6 +680,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[LA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[LA]], i64* [[LA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[COMP]], i64* [[COMP_ADDR]], align 8 @@ -672,23 +688,27 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[COMP_ADDR]] to i32* // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* [[CONV1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV1]], i32** [[TMP2]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[COMP]], i32** [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -700,6 +720,7 @@ // CHECK1-NEXT: [[GBLB_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[LC_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[GBLA]], i64* [[GBLA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 @@ -719,23 +740,27 @@ // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i64 [[TMP3]], [[CONV4]] // CHECK1-NEXT: [[TMP5:%.*]] = trunc i64 [[ADD5]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[ADD]], i32 [[TMP5]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[CONV3]]) +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV3]], i32** [[TMP6]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[COMP]], i32** [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -745,6 +770,7 @@ // CHECK1-NEXT: [[GBLC_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[GBLC]], i64* [[GBLC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[COMP]], i64* [[COMP_ADDR]], align 8 @@ -756,30 +782,35 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* @Gbla, align 4 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP2]], 2 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[ADD]], i32 [[ADD2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32* [[CONV1]], i32* [[TMP3]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV1]], i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[GBLC:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[GBLC_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[COMP]], i32** [[COMP_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[GBLC]], i32** [[GBLC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[GBLC_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* @Gbla, align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP2]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* @Gbla, align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -1057,24 +1088,29 @@ // CHECK3-SAME: (i32 noundef [[COMP:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[COMP]], i32* [[COMP_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[COMP_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[COMP_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[COMP]], i32** [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -1082,24 +1118,29 @@ // CHECK3-SAME: (i32 noundef [[COMP:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[COMP]], i32* [[COMP_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[COMP_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[COMP_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[COMP]], i32** [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -1108,28 +1149,33 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[LA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[LA]], i32* [[LA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[COMP]], i32* [[COMP_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[LA_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[COMP_ADDR]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[COMP_ADDR]], i32** [[TMP2]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[COMP]], i32** [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -1138,28 +1184,33 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[LA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[LA]], i32* [[LA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[COMP]], i32* [[COMP_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[LA_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* [[COMP_ADDR]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[COMP_ADDR]], i32** [[TMP2]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[COMP]], i32** [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -1172,6 +1223,7 @@ // CHECK3-NEXT: [[LC_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[GBLB1:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[GBLA]], i32* [[GBLA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 @@ -1191,23 +1243,27 @@ // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i64 [[TMP5]], [[CONV2]] // CHECK3-NEXT: [[TMP7:%.*]] = trunc i64 [[ADD3]] to i32 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[ADD]], i32 [[TMP7]]) -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[COMP_ADDR]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[COMP_ADDR]], i32** [[TMP8]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[COMP]], i32** [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -1217,6 +1273,7 @@ // CHECK3-NEXT: [[GBLC_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[GBLC]], i32* [[GBLC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[COMP]], i32* [[COMP_ADDR]], align 4 @@ -1226,30 +1283,35 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* @Gbla, align 4 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP2]], 2 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[ADD]], i32 [[ADD1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32* [[COMP_ADDR]], i32* [[TMP3]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[COMP_ADDR]], i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP]], align 4 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[TMP4]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[GBLC:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[GBLC_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[COMP]], i32** [[COMP_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[GBLC]], i32** [[GBLC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[GBLC_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[TMP1]], i32** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* @Gbla, align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP2]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: store i32* [[TMP4]], i32** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* @Gbla, align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -1370,6 +1432,7 @@ // CHECK9-NEXT: [[GBLA_ADDR:%.*]] = alloca %struct.SS*, align 8 // CHECK9-NEXT: [[LA_ADDR:%.*]] = alloca %struct.SS*, align 8 // CHECK9-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store %struct.SS* [[GBLA]], %struct.SS** [[GBLA_ADDR]], align 8 // CHECK9-NEXT: store %struct.SS* [[LA]], %struct.SS** [[LA_ADDR]], align 8 @@ -1384,23 +1447,27 @@ // CHECK9-NEXT: [[CONV1:%.*]] = fptosi float [[TMP4]] to i64 // CHECK9-NEXT: [[TMP5:%.*]] = trunc i64 [[CONV1]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP3]], i32 [[TMP5]]) -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP6]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[COMP_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[COMP]], i32** [[COMP_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[COMP_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK9-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK9-NEXT: ret void // // @@ -1410,6 +1477,7 @@ // CHECK9-NEXT: [[LB_ADDR:%.*]] = alloca %struct.SS.0*, align 8 // CHECK9-NEXT: [[GBLB_ADDR:%.*]] = alloca %struct.SS.0*, align 8 // CHECK9-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store %struct.SS.0* [[LB]], %struct.SS.0** [[LB_ADDR]], align 8 // CHECK9-NEXT: store %struct.SS.0* [[GBLB]], %struct.SS.0** [[GBLB_ADDR]], align 8 @@ -1425,23 +1493,27 @@ // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[A]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP4]], i32 [[TMP6]]) -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP7]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[COMP_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[COMP]], i32** [[COMP_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[COMP_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK9-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK9-NEXT: ret void // // @@ -1560,6 +1632,7 @@ // CHECK11-NEXT: [[GBLA_ADDR:%.*]] = alloca %struct.SS*, align 4 // CHECK11-NEXT: [[LA_ADDR:%.*]] = alloca %struct.SS*, align 4 // CHECK11-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: store %struct.SS* [[GBLA]], %struct.SS** [[GBLA_ADDR]], align 4 // CHECK11-NEXT: store %struct.SS* [[LA]], %struct.SS** [[LA_ADDR]], align 4 @@ -1573,23 +1646,27 @@ // CHECK11-NEXT: [[CONV:%.*]] = fptosi float [[TMP4]] to i64 // CHECK11-NEXT: [[TMP5:%.*]] = trunc i64 [[CONV]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP3]], i32 [[TMP5]]) -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[COMP_ADDR]]) +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[COMP_ADDR]], i32** [[TMP6]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[COMP_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[COMP]], i32** [[COMP_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[COMP_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK11-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK11-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK11-NEXT: ret void // // @@ -1599,6 +1676,7 @@ // CHECK11-NEXT: [[LB_ADDR:%.*]] = alloca %struct.SS.0*, align 4 // CHECK11-NEXT: [[GBLB_ADDR:%.*]] = alloca %struct.SS.0*, align 4 // CHECK11-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: store %struct.SS.0* [[LB]], %struct.SS.0** [[LB_ADDR]], align 4 // CHECK11-NEXT: store %struct.SS.0* [[GBLB]], %struct.SS.0** [[GBLB_ADDR]], align 4 @@ -1613,23 +1691,27 @@ // CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[A]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP4]], i32 [[TMP6]]) -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[COMP_ADDR]]) +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[COMP_ADDR]], i32** [[TMP7]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[COMP_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[COMP]], i32** [[COMP_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[COMP_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK11-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK11-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK11-NEXT: ret void // // @@ -1739,6 +1821,7 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 // CHECK17-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[COMP]], i64* [[COMP_ADDR]], align 8 @@ -1747,23 +1830,27 @@ // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP1]], i32 0, i32 0 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 123) -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[COMP_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i32* [[COMP]], i32** [[COMP_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[COMP_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK17-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK17-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK17-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK17-NEXT: ret void // // @@ -1772,6 +1859,7 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 // CHECK17-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[COMP]], i64* [[COMP_ADDR]], align 8 @@ -1782,23 +1870,27 @@ // CHECK17-NEXT: [[CONV1:%.*]] = fptosi float [[TMP2]] to i32 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 123 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 456, i32 [[ADD]]) -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[COMP_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i32* [[COMP]], i32** [[COMP_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[COMP_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK17-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK17-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK17-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK17-NEXT: ret void // // @@ -1906,6 +1998,7 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 // CHECK19-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK19-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[COMP]], i32* [[COMP_ADDR]], align 4 @@ -1913,23 +2006,27 @@ // CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP1]], i32 0, i32 0 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 // CHECK19-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 123) -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[COMP_ADDR]]) +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[COMP_ADDR]], i32** [[TMP3]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[COMP_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[COMP]], i32** [[COMP_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[COMP_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK19-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK19-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK19-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK19-NEXT: ret void // // @@ -1938,6 +2035,7 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 // CHECK19-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK19-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[COMP]], i32* [[COMP_ADDR]], align 4 @@ -1947,23 +2045,27 @@ // CHECK19-NEXT: [[CONV:%.*]] = fptosi float [[TMP2]] to i32 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 123 // CHECK19-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 456, i32 [[ADD]]) -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[COMP_ADDR]]) +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[COMP_ADDR]], i32** [[TMP3]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[COMP_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[COMP]], i32** [[COMP_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[COMP_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK19-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK19-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK19-NEXT: store i32 [[INC]], i32* [[TMP2]], align 4 // CHECK19-NEXT: ret void // // @@ -1978,23 +2080,28 @@ // CHECK25-SAME: (i64 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK25-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* -// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK25-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK25-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK25-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK25-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK25-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK25-NEXT: store i32 0, i32* [[TMP2]], align 4 // CHECK25-NEXT: ret void // // @@ -2002,22 +2109,27 @@ // CHECK25-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR0]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK25-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 -// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]]) +// CHECK25-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store i8*** [[ARGC_ADDR]], i8**** [[TMP0]], align 8 +// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK25-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK25-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8 -// CHECK25-NEXT: store i8** null, i8*** [[TMP0]], align 8 +// CHECK25-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load i8***, i8**** [[TMP1]], align 8 +// CHECK25-NEXT: store i8** null, i8*** [[TMP2]], align 8 // CHECK25-NEXT: ret void // // @@ -2025,22 +2137,27 @@ // CHECK27-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK27-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]]) +// CHECK27-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store i32* [[ARGC_ADDR]], i32** [[TMP0]], align 4 +// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK27-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK27-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 -// CHECK27-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK27-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK27-NEXT: store i32 0, i32* [[TMP2]], align 4 // CHECK27-NEXT: ret void // // @@ -2048,22 +2165,27 @@ // CHECK27-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR0]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK27-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 -// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]]) +// CHECK27-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store i8*** [[ARGC_ADDR]], i8**** [[TMP0]], align 4 +// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK27-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK27-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 4 -// CHECK27-NEXT: store i8** null, i8*** [[TMP0]], align 4 +// CHECK27-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load i8***, i8**** [[TMP1]], align 4 +// CHECK27-NEXT: store i8** null, i8*** [[TMP2]], align 4 // CHECK27-NEXT: ret void // // @@ -2073,6 +2195,7 @@ // CHECK33-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 +// CHECK33-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK33-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK33-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK33-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -2083,21 +2206,25 @@ // CHECK33-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK33-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK33-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK33-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]]) +// CHECK33-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK33-NEXT: store i32* [[CONV2]], i32** [[TMP3]], align 8 +// CHECK33-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK33-NEXT: ret void // // // CHECK33-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK33-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK33-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK33-NEXT: entry: // CHECK33-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK33-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK33-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 +// CHECK33-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK33-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK33-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK33-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK33-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK33-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK33-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK33-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK33-NEXT: store i32 0, i32* [[TMP2]], align 4 // CHECK33-NEXT: ret void // // @@ -2107,6 +2234,7 @@ // CHECK33-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 +// CHECK33-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK33-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK33-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK33-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -2116,21 +2244,25 @@ // CHECK33-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK33-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK33-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK33-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]]) +// CHECK33-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK33-NEXT: store i8*** [[ARGC_ADDR]], i8**** [[TMP3]], align 8 +// CHECK33-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK33-NEXT: ret void // // // CHECK33-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK33-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK33-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK33-NEXT: entry: // CHECK33-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK33-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK33-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 8 +// CHECK33-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK33-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK33-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK33-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8 -// CHECK33-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8 -// CHECK33-NEXT: store i8** null, i8*** [[TMP0]], align 8 +// CHECK33-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK33-NEXT: [[TMP2:%.*]] = load i8***, i8**** [[TMP1]], align 8 +// CHECK33-NEXT: store i8** null, i8*** [[TMP2]], align 8 // CHECK33-NEXT: ret void // // @@ -2140,6 +2272,7 @@ // CHECK35-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK35-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK35-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK35-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK35-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -2147,21 +2280,25 @@ // CHECK35-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK35-NEXT: [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK35-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK35-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]]) +// CHECK35-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK35-NEXT: store i32* [[ARGC_ADDR]], i32** [[TMP3]], align 4 +// CHECK35-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK35-NEXT: ret void // // // CHECK35-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK35-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK35-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK35-NEXT: entry: // CHECK35-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK35-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK35-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 +// CHECK35-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK35-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK35-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK35-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 -// CHECK35-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 -// CHECK35-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK35-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK35-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK35-NEXT: store i32 0, i32* [[TMP2]], align 4 // CHECK35-NEXT: ret void // // @@ -2171,6 +2308,7 @@ // CHECK35-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 +// CHECK35-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK35-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK35-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK35-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -2178,54 +2316,66 @@ // CHECK35-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK35-NEXT: [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK35-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK35-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]]) +// CHECK35-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK35-NEXT: store i8*** [[ARGC_ADDR]], i8**** [[TMP3]], align 4 +// CHECK35-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK35-NEXT: ret void // // // CHECK35-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK35-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK35-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK35-NEXT: entry: // CHECK35-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK35-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK35-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 4 +// CHECK35-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK35-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK35-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK35-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 4 -// CHECK35-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 4 -// CHECK35-NEXT: store i8** null, i8*** [[TMP0]], align 4 +// CHECK35-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK35-NEXT: [[TMP2:%.*]] = load i8***, i8**** [[TMP1]], align 4 +// CHECK35-NEXT: store i8** null, i8*** [[TMP2]], align 4 // CHECK35-NEXT: ret void // // // CHECK41-LABEL: define {{[^@]+}}@_Z3foov // CHECK41-SAME: () #[[ATTR0:[0-9]+]] { // CHECK41-NEXT: entry: -// CHECK41-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK41-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK41-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK41-NEXT: ret void // // // CHECK41-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK41-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK41-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK41-NEXT: entry: // CHECK41-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK41-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK41-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK41-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK41-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK41-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK41-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK41-NEXT: ret void // // // CHECK43-LABEL: define {{[^@]+}}@_Z3foov // CHECK43-SAME: () #[[ATTR0:[0-9]+]] { // CHECK43-NEXT: entry: -// CHECK43-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK43-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK43-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK43-NEXT: ret void // // // CHECK43-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK43-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK43-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK43-NEXT: entry: // CHECK43-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK43-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK43-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK43-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK43-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK43-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK43-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK43-NEXT: ret void // diff --git a/clang/test/OpenMP/teams_distribute_codegen.cpp b/clang/test/OpenMP/teams_distribute_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_codegen.cpp @@ -319,6 +319,7 @@ // CHECK1-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK1-NEXT: store i64 [[TE]], i64* [[TE_ADDR]], align 8 // CHECK1-NEXT: store i64 [[TH]], i64* [[TH_ADDR]], align 8 @@ -331,17 +332,20 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]], [100 x i32]* [[TMP1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [100 x i32]* [[TMP1]], [100 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -354,74 +358,76 @@ // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP4]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -432,21 +438,25 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[CONV]], [100 x i32]* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [100 x i32]* [[TMP0]], [100 x i32]** [[TMP2]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -459,74 +469,76 @@ // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP4]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -679,6 +691,7 @@ // CHECK3-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK3-NEXT: store i32 [[TE]], i32* [[TE_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TH]], i32* [[TH_ADDR]], align 4 @@ -688,17 +701,20 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TE_ADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TH_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], [100 x i32]* [[TMP1]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store [100 x i32]* [[TMP1]], [100 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -711,73 +727,75 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP1]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP4]], i32 0, i32 [[TMP19]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -788,20 +806,24 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], [100 x i32]* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store [100 x i32]* [[TMP0]], [100 x i32]** [[TMP2]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -814,73 +836,75 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP1]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP4]], i32 0, i32 [[TMP19]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -981,24 +1005,29 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1011,76 +1040,78 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1180,23 +1211,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1209,75 +1245,77 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP17]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -1337,18 +1375,21 @@ // CHECK17-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1358,56 +1399,58 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // @@ -1465,18 +1508,21 @@ // CHECK19-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1486,55 +1532,57 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP9]] +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // @@ -1641,24 +1689,29 @@ // CHECK25-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK25-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK25-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK25-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK25-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK25-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK25-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK25-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK25-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1671,76 +1724,78 @@ // CHECK25-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK25-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK25-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK25-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK25-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK25-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK25-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK25-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK25-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK25-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK25-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK25-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK25-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK25: omp.precond.then: // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK25-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK25-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK25-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK25-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK25-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK25-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK25-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK25-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK25-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) // CHECK25-NEXT: br label [[OMP_PRECOND_END]] // CHECK25: omp.precond.end: // CHECK25-NEXT: ret void @@ -1815,6 +1870,7 @@ // CHECK25-NEXT: [[TE_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK25-NEXT: store i64 [[TE]], i64* [[TE_ADDR]], align 8 // CHECK25-NEXT: store i64 [[TH]], i64* [[TH_ADDR]], align 8 @@ -1825,16 +1881,18 @@ // CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) +// CHECK25-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store [10 x i32]* [[TMP1]], [10 x i32]** [[TMP4]], align 8 +// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1844,55 +1902,57 @@ // CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK25-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK25-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK25-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK25-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK25-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK25-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK25-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK25-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK25-NEXT: ret void // // @@ -1998,23 +2058,28 @@ // CHECK27-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK27-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK27-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK27-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK27-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK27-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK27-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK27-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2027,75 +2092,77 @@ // CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK27-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK27-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK27-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK27-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK27-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK27-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK27-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK27-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK27-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK27: omp.precond.then: // CHECK27-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK27-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK27-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK27-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK27-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK27-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK27-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK27-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP17]] +// CHECK27-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 [[TMP21]] // CHECK27-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK27-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK27-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK27-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) // CHECK27-NEXT: br label [[OMP_PRECOND_END]] // CHECK27: omp.precond.end: // CHECK27-NEXT: ret void @@ -2168,6 +2235,7 @@ // CHECK27-NEXT: [[TE_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK27-NEXT: store i32 [[TE]], i32* [[TE_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TH]], i32* [[TH_ADDR]], align 4 @@ -2176,16 +2244,18 @@ // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[TE_ADDR]], align 4 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[TH_ADDR]], align 4 // CHECK27-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) +// CHECK27-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store [10 x i32]* [[TMP1]], [10 x i32]** [[TMP4]], align 4 +// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2195,54 +2265,56 @@ // CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK27-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK27-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK27-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK27-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK27-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK27-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK27-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 [[TMP11]] // CHECK27-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK27-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK27-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK27-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp @@ -146,18 +146,21 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -169,68 +172,70 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK1-NEXT: store i32 [[ADD6]], i32* [[J]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX8]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -290,18 +295,21 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -313,66 +321,68 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], i32* [[J]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i32 0, i32 [[TMP13]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -519,6 +529,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -529,153 +540,161 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i64, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV3]], i64 [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV3]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], i64* [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP7]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[M]], i32** [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[M_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 // CHECK9-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]] -// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV21]], i32* [[I11]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]] -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK9-NEXT: store i32 [[CONV35]], i32* [[J12]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I11]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[J12]], align 4 -// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM36]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX37]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], i32* [[I9]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], i32* [[J10]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[I9]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[J10]], align 4 +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX35]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK9-NEXT: store i64 [[ADD38]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -717,18 +736,21 @@ // CHECK9-SAME: ([10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x [2 x i32]]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x [2 x i32]]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x [2 x i32]]* [[TMP0]], [10 x [2 x i32]]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -740,67 +762,69 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK9-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK9-NEXT: store i32 [[ADD6]], i32* [[J]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX8]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -944,6 +968,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -952,151 +977,159 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32* [[M_ADDR]], i32 [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[M_ADDR]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP7]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[M]], i32** [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[M_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 // CHECK11-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]] -// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV21]], i32* [[I11]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]] -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK11-NEXT: store i32 [[CONV35]], i32* [[J12]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I11]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP3]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[J12]], align 4 -// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i32 [[TMP29]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX36]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK11-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK11-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK11-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK11-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK11-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK11-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV19]], i32* [[I9]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK11-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK11-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK11-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK11-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK11-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK11-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK11-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK11-NEXT: store i32 [[CONV33]], i32* [[J10]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[I9]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = mul nsw i32 [[TMP33]], [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[J10]], align 4 +// CHECK11-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i32 [[TMP35]] +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX34]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK11-NEXT: store i64 [[ADD37]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD35:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK11-NEXT: store i64 [[ADD35]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -1138,18 +1171,21 @@ // CHECK11-SAME: ([10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x [2 x i32]]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x [2 x i32]]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x [2 x i32]]* [[TMP0]], [10 x [2 x i32]]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1161,65 +1197,67 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], i32* [[J]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP2]], i32 0, i32 [[TMP13]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp @@ -224,18 +224,21 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -245,56 +248,58 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -302,18 +307,21 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -323,56 +331,58 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -380,18 +390,21 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -401,73 +414,75 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -571,18 +586,21 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -592,55 +610,57 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -648,18 +668,21 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -669,55 +692,57 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -725,18 +750,21 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -746,72 +774,74 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP13]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -1042,24 +1072,29 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1072,76 +1107,78 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1153,24 +1190,29 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1183,76 +1225,78 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1265,7 +1309,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -1274,23 +1318,26 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -1303,96 +1350,99 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK9-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1477,18 +1527,21 @@ // CHECK9-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1498,55 +1551,57 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -1554,18 +1609,21 @@ // CHECK9-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1575,55 +1633,57 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -1631,18 +1691,21 @@ // CHECK9-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1652,72 +1715,74 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -1947,23 +2012,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1976,75 +2046,77 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP17]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2056,23 +2128,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2085,75 +2162,77 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP17]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2166,7 +2245,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 @@ -2174,22 +2253,26 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2202,94 +2285,98 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP20]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 [[TMP26]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2374,18 +2461,21 @@ // CHECK11-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2395,54 +2485,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // @@ -2450,18 +2542,21 @@ // CHECK11-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2471,54 +2566,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // @@ -2526,18 +2623,21 @@ // CHECK11-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2547,71 +2647,73 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 [[TMP13]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp @@ -336,8 +336,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -348,147 +347,153 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 [[IDXPROM8]] -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8* +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP28]] +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP25]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP33]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done9: // CHECK1-NEXT: ret void // // @@ -689,7 +694,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 @@ -699,24 +704,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP7]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -724,118 +732,121 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca %struct.S.0*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 8 +// CHECK1-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S.0]* [[TMP6]] to %struct.S.0* +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP11]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP13:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP13]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[ARRAYIDX10]] to i8* -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S.0* [[TMP18]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[ARRAYIDX7]] to i8* +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[TMP24]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP25]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -1126,8 +1137,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1136,141 +1146,151 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC1]] to i8* -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP17]] -// CHECK3-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* -// CHECK3-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP24]] +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP25]] +// CHECK3-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK3-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], [[TMP28]] +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP25]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP33]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1470,7 +1490,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 @@ -1479,23 +1499,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 // CHECK3-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP7]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1503,115 +1527,119 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca %struct.S.0*, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 4 +// CHECK3-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S.0]* [[TMP6]] to %struct.S.0* +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP11]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP13:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP13]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP17]] -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* -// CHECK3-NEXT: [[TMP21:%.*]] = bitcast %struct.S.0* [[TMP18]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP23]] +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP25]] +// CHECK3-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* +// CHECK3-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[TMP24]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP25]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // @@ -1840,9 +1868,7 @@ // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 @@ -1850,34 +1876,31 @@ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load volatile i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* @g, align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 @@ -1886,68 +1909,73 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[G_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[G]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[G1]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[SIVAR]], align 4 +// CHECK9-NEXT: store i32* [[G1]], i32** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: store volatile i32 1, i32* [[TMP8]], align 4 -// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP12]], align 8 +// CHECK9-NEXT: store i32 1, i32* [[G]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK9-NEXT: store volatile i32 1, i32* [[TMP15]], align 4 +// CHECK9-NEXT: store i32 2, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[G]], i32** [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK9-NEXT: store i32* [[TMP18]], i32** [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP19]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK9-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp @@ -163,6 +163,7 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 @@ -172,20 +173,25 @@ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* // CHECK1-NEXT: store double* [[CONV1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[CONV]], double* [[TMP0]], i32* [[CONV2]], float* [[CONV3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[CONV]], double** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP2]], double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[CONV3]], float** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -193,97 +199,99 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store double* [[G]], double** [[G_ADDR]], align 8 -// CHECK1-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store double* [[TMP1]], double** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK1-NEXT: store double* [[TMP4]], double** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[G13]], double** [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[G1]], double** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: store double 1.000000e+00, double* [[G2]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP4]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP13]], align 8 -// CHECK1-NEXT: store i32 3, i32* [[SVAR5]], align 4 -// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[G2]], double** [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP16:%.*]] = load double*, double** [[_TMP4]], align 8 -// CHECK1-NEXT: store double* [[TMP16]], double** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[SVAR5]], i32** [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[SFVAR6]], float** [[TMP18]], align 8 +// CHECK1-NEXT: store double 1.000000e+00, double* [[G]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load double*, double** [[_TMP2]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP18]], align 8 +// CHECK1-NEXT: store i32 3, i32* [[SVAR]], align 4 +// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[G]], double** [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load double*, double** [[_TMP2]], align 8 +// CHECK1-NEXT: store double* [[TMP21]], double** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP23]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[G2]], align 8 -// CHECK1-NEXT: store volatile double [[TMP22]], double* [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[TMP23]], align 8 -// CHECK1-NEXT: store volatile double [[TMP24]], double* [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP25]], i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load float, float* [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP26]], float* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load double, double* [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP27]], double* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load double*, double** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load double, double* [[TMP28]], align 8 +// CHECK1-NEXT: store volatile double [[TMP29]], double* [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP31]], float* [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -325,6 +333,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 @@ -339,20 +348,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], double* [[G13]], align 8 // CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[G2]], double* [[TMP5]], i32* [[SVAR_ADDR]], float* [[CONV]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G2]], double** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load double*, double** [[_TMP4]], align 4 +// CHECK3-NEXT: store double* [[TMP7]], double** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[CONV]], float** [[TMP9]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -360,97 +374,99 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store double* [[TMP1]], double** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK3-NEXT: store double* [[TMP4]], double** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: store double 1.000000e+00, double* [[G2]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP13]], align 4 -// CHECK3-NEXT: store i32 3, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G2]], double** [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: store double* [[TMP16]], double** [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR5]], i32** [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[SFVAR6]], float** [[TMP18]], align 4 +// CHECK3-NEXT: store double 1.000000e+00, double* [[G]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP18]], align 4 +// CHECK3-NEXT: store i32 3, i32* [[SVAR]], align 4 +// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: store double* [[TMP21]], double** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP23]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP22]], double* [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[TMP23]], align 4 -// CHECK3-NEXT: store volatile double [[TMP24]], double* [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP25]], i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load float, float* [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP26]], float* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP27]], double* [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP28:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load double, double* [[TMP28]], align 4 +// CHECK3-NEXT: store volatile double [[TMP29]], double* [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP31]], float* [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -605,6 +621,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -616,21 +633,27 @@ // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK9-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32* [[CONV]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[CONV1]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP8]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -638,31 +661,33 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -672,108 +697,108 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store %struct.S* [[VAR5]], %struct.S** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX10]] to i8* -// CHECK9-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[TMP16]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK9-NEXT: [[TMP24:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8* +// CHECK9-NEXT: [[TMP25:%.*]] = bitcast %struct.S* [[TMP22]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast [2 x %struct.S]* [[S_ARR4]] to %struct.S* -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN12]], [[TMP29]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK9-NEXT: [[TMP33:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP34:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN7]], [[TMP35]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP28]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP34]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP32:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[TMP5]] to i8* -// CHECK9-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP32]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK9-NEXT: store i32 [[TMP35]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP35]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done8: +// CHECK9-NEXT: [[TMP38:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[TMP11]] to i8* +// CHECK9-NEXT: [[TMP40:%.*]] = bitcast %struct.S* [[TMP38]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP39]], i8* align 4 [[TMP40]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP41]], i32* [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP42]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -943,6 +968,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 @@ -952,20 +978,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK9-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32* [[CONV]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S.0* [[TMP7]], %struct.S.0** [[TMP6]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -973,28 +1004,30 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 8 +// CHECK9-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1004,106 +1037,106 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP13]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i64 0, i64 [[IDXPROM8]] -// CHECK9-NEXT: [[TMP17:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* -// CHECK9-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[TMP15]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK9-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5]] to i8* +// CHECK9-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP24]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN11]], [[TMP28]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK9-NEXT: [[TMP31:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP32:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR]] to %struct.S.0* +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN7]], [[TMP33]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP27]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP32]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done12: -// CHECK9-NEXT: [[TMP31:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[TMP4]] to i8* -// CHECK9-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[TMP31]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP33]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done8: +// CHECK9-NEXT: [[TMP36:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP37:%.*]] = bitcast %struct.S.0* [[TMP9]] to i8* +// CHECK9-NEXT: [[TMP38:%.*]] = bitcast %struct.S.0* [[TMP36]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN13]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP34]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done14: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -1298,6 +1331,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1307,21 +1341,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 // CHECK11-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32* [[T_VAR_ADDR]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1329,31 +1369,33 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1363,106 +1405,106 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store %struct.S* [[VAR5]], %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* -// CHECK11-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[TMP16]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP23]] +// CHECK11-NEXT: [[TMP24:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK11-NEXT: [[TMP25:%.*]] = bitcast %struct.S* [[TMP22]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK11-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast [2 x %struct.S]* [[S_ARR4]] to %struct.S* -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN11]], [[TMP29]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK11-NEXT: [[TMP33:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP34:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN6]], [[TMP35]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP28]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP34]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP32:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[TMP5]] to i8* -// CHECK11-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP32]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP35]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP35]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP38:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[TMP11]] to i8* +// CHECK11-NEXT: [[TMP40:%.*]] = bitcast %struct.S* [[TMP38]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP39]], i8* align 4 [[TMP40]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP41]], i32* [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP42]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -1631,6 +1673,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 @@ -1639,20 +1682,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 // CHECK11-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32* [[T_VAR_ADDR]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S.0* [[TMP7]], %struct.S.0** [[TMP6]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1660,28 +1708,30 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 4 +// CHECK11-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1691,104 +1741,104 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP14]] -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: [[TMP17:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* -// CHECK11-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[TMP15]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* +// CHECK11-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP24]], i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN10]], [[TMP28]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK11-NEXT: [[TMP31:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP32:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR]] to %struct.S.0* +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN6]], [[TMP33]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP27]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP32]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP31:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[TMP4]] to i8* -// CHECK11-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[TMP31]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP33]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP36:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = bitcast %struct.S.0* [[TMP9]] to i8* +// CHECK11-NEXT: [[TMP38:%.*]] = bitcast %struct.S.0* [[TMP36]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP34]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp @@ -318,6 +318,7 @@ // CHECK1-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK1-NEXT: store i64 [[TE]], i64* [[TE_ADDR]], align 8 // CHECK1-NEXT: store i64 [[TH]], i64* [[TH_ADDR]], align 8 @@ -330,17 +331,20 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]], [100 x i32]* [[TMP1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [100 x i32]* [[TMP1]], [100 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -350,87 +354,99 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i32* [[TMP0]], [100 x i32]* [[TMP1]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [100 x i32]* [[TMP4]], [100 x i32]** [[TMP25]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -443,73 +459,77 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], i32 2) -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 2) +// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: @@ -517,21 +537,21 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: cancel.exit: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK1-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: omp.precond.end: // CHECK1-NEXT: br label [[CANCEL_CONT]] @@ -544,21 +564,25 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[CONV]], [100 x i32]* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [100 x i32]* [[TMP0]], [100 x i32]** [[TMP2]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -568,87 +592,99 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, [100 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i32* [[TMP0]], [100 x i32]* [[TMP1]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [100 x i32]* [[TMP4]], [100 x i32]** [[TMP25]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -661,82 +697,86 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP16]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -888,6 +928,7 @@ // CHECK3-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK3-NEXT: store i32 [[TE]], i32* [[TE_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TH]], i32* [[TH_ADDR]], align 4 @@ -897,17 +938,20 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TE_ADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TH_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], [100 x i32]* [[TMP1]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store [100 x i32]* [[TMP1]], [100 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -917,85 +961,97 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32* [[TMP0]], [100 x i32]* [[TMP1]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[TMP2]], i32** [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store [100 x i32]* [[TMP4]], [100 x i32]** [[TMP23]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1008,70 +1064,74 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP1]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP8]], i32 0, i32 [[TMP25]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], i32 2) -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 2) +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK3: .cancel.exit: // CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK3: .cancel.continue: @@ -1079,21 +1139,21 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: cancel.exit: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK3-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: omp.precond.end: // CHECK3-NEXT: br label [[CANCEL_CONT]] @@ -1106,20 +1166,24 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], [100 x i32]* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store [100 x i32]* [[TMP0]], [100 x i32]** [[TMP2]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1129,85 +1193,97 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, [100 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32* [[TMP0]], [100 x i32]* [[TMP1]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[TMP2]], i32** [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store [100 x i32]* [[TMP4]], [100 x i32]** [[TMP23]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1220,79 +1296,83 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP16]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP1]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP8]], i32 0, i32 [[TMP25]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -1393,24 +1473,29 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1420,90 +1505,103 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1516,84 +1614,88 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1693,23 +1795,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1719,88 +1826,101 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[TMP6]], i32** [[TMP26]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1813,81 +1933,85 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -1947,83 +2071,97 @@ // CHECK17-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK17-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2033,64 +2171,68 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -2148,81 +2290,95 @@ // CHECK19-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2232,61 +2388,65 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -2393,24 +2553,29 @@ // CHECK25-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK25-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK25-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK25-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK25-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK25-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK25-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK25-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK25-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2420,90 +2585,103 @@ // CHECK25-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK25-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK25-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK25-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK25-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK25-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK25-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK25-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK25-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK25-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK25-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK25-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK25-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK25: omp.precond.then: // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK25-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK25-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK25-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK25-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK25-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK25-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK25-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK25-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK25-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP24]], align 8 +// CHECK25-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK25-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP25]], align 8 +// CHECK25-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK25-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 8 +// CHECK25-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK25-NEXT: store i64 [[TMP4]], i64* [[TMP27]], align 8 +// CHECK25-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK25-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 8 +// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK25-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK25-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK25-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK25-NEXT: br label [[OMP_PRECOND_END]] // CHECK25: omp.precond.end: // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2516,84 +2694,88 @@ // CHECK25-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK25-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK25-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK25-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK25-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK25-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK25-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK25-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK25-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK25-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK25-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK25-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK25-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK25-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK25-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK25-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK25: omp.precond.then: // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK25-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK25-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK25-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK25-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK25-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK25-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK25-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK25-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK25-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK25-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK25-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK25-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK25-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK25-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK25-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK25-NEXT: br label [[OMP_PRECOND_END]] // CHECK25: omp.precond.end: // CHECK25-NEXT: ret void @@ -2667,6 +2849,7 @@ // CHECK25-NEXT: [[TE_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK25-NEXT: store i64 [[TE]], i64* [[TE_ADDR]], align 8 // CHECK25-NEXT: store i64 [[TH]], i64* [[TH_ADDR]], align 8 @@ -2677,81 +2860,94 @@ // CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) +// CHECK25-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store [10 x i32]* [[TMP1]], [10 x i32]** [[TMP4]], align 8 +// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK25-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK25-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK25-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK25-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK25-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK25-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK25-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK25-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK25-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK25-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK25-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK25-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK25-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 8 +// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK25-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK25-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2761,63 +2957,67 @@ // CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK25-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK25-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK25-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK25-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK25-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK25-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK25-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK25-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK25-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK25-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK25-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK25-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK25-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK25-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK25-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK25-NEXT: ret void // // @@ -2923,23 +3123,28 @@ // CHECK27-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK27-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK27-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK27-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK27-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK27-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK27-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK27-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2949,88 +3154,101 @@ // CHECK27-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK27-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK27-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK27-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK27-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK27-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK27-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK27-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK27-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK27-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK27-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK27: omp.precond.then: // CHECK27-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK27-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK27-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK27-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK27-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK27-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK27-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP22]], align 4 +// CHECK27-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK27-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP23]], align 4 +// CHECK27-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK27-NEXT: store i32* [[TMP2]], i32** [[TMP24]], align 4 +// CHECK27-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK27-NEXT: store i32 [[TMP4]], i32* [[TMP25]], align 4 +// CHECK27-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK27-NEXT: store i32* [[TMP6]], i32** [[TMP26]], align 4 +// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK27-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK27-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK27-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK27-NEXT: br label [[OMP_PRECOND_END]] // CHECK27: omp.precond.end: // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3043,81 +3261,85 @@ // CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK27-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK27-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK27-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK27-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK27-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK27-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK27-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK27-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK27-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK27-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK27-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK27: omp.precond.then: // CHECK27-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK27-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK27-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK27-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK27-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: -// CHECK27-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK27-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK27-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK27-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK27-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK27-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] +// CHECK27-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK27-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK27-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK27-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK27-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK27-NEXT: br label [[OMP_PRECOND_END]] // CHECK27: omp.precond.end: // CHECK27-NEXT: ret void @@ -3189,6 +3411,7 @@ // CHECK27-NEXT: [[TE_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK27-NEXT: store i32 [[TE]], i32* [[TE_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TH]], i32* [[TH_ADDR]], align 4 @@ -3197,79 +3420,92 @@ // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[TE_ADDR]], align 4 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[TH_ADDR]], align 4 // CHECK27-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) +// CHECK27-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store [10 x i32]* [[TMP1]], [10 x i32]** [[TMP4]], align 4 +// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK27-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK27-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK27-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK27-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK27-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK27-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK27-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK27-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK27-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK27-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP14]], align 4 +// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK27-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK27-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3279,60 +3515,64 @@ // CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK27-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK27-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK27-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK27-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK27-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK27-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK27-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK27-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK27-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK27-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK27-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK27-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp @@ -151,18 +151,21 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -170,66 +173,77 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 56087, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -241,76 +255,80 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK1-NEXT: store i32 [[ADD7]], i32* [[J]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -370,18 +388,21 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -389,64 +410,75 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 56087, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -458,72 +490,76 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], i32* [[J]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i32 0, i32 [[TMP19]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP20]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -670,6 +706,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -680,273 +717,298 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i64, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV3]], i64 [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV3]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], i64* [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP7]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[M]], i32** [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[M_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 // CHECK9-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], i64* [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32*, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP21]], i64 [[TMP22]], i32* [[TMP0]], i32* [[TMP1]], i64 [[TMP2]], i64 [[TMP3]], i32* [[TMP4]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP27]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32* [[TMP4]], i32** [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP6]], i64* [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store i32* [[TMP10]], i32** [[TMP35]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] // CHECK9-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[M]], i32** [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[M_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 // CHECK9-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP21]], i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: store i64 [[TMP22]], i64* [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP20]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP30]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP23]], [[CONV18]] -// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV21]], i32* [[I11]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP26]], [[CONV25]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP25]], [[MUL31]] -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK9-NEXT: store i32 [[CONV35]], i32* [[J12]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I11]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[TMP30:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 [[TMP30]] -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[J12]], align 4 -// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64 -// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM36]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX37]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP33]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], i32* [[I9]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP36]], [[CONV23]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP35]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], i32* [[J10]], align 4 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[I9]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP39]] to i64 +// CHECK9-NEXT: [[TMP40:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP12]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i64 [[TMP40]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[J10]], align 4 +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX35]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP32]], 1 -// CHECK9-NEXT: store i64 [[ADD38]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK9-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -988,18 +1050,21 @@ // CHECK9-SAME: ([10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x [2 x i32]]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [10 x [2 x i32]]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x [2 x i32]]* [[TMP0]], [10 x [2 x i32]]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1007,66 +1072,77 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 19, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x [2 x i32]]* [[TMP0]]) +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x [2 x i32]]* [[TMP2]], [10 x [2 x i32]]** [[TMP16]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1078,75 +1154,79 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK9-NEXT: store i32 [[ADD7]], i32* [[J]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[J]], align 4 +// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK9-NEXT: ret void // // @@ -1290,6 +1370,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -1298,275 +1379,300 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32* [[M_ADDR]], i32 [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[M_ADDR]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP7]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[M]], i32** [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[M_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 // CHECK11-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], i64* [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP22]], i32 [[TMP24]], i32* [[TMP0]], i32* [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32* [[TMP4]]) +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +// CHECK11-NEXT: store i32 [[TMP28]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = trunc i64 [[TMP29]] to i32 +// CHECK11-NEXT: store i32 [[TMP30]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP32]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP33]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32* [[TMP4]], i32** [[TMP34]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[TMP36]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store i32* [[TMP10]], i32** [[TMP37]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP39:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP38]], [[TMP39]] // CHECK11-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[M]], i32** [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[M_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 // CHECK11-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV11:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV12:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK11-NEXT: store i64 [[CONV11]], i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[CONV12]], i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP21]], i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[CONV9:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[CONV10:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK11-NEXT: store i64 [[CONV9]], i64* [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[CONV10]], i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP20]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP30]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP21]], [[TMP22]] -// CHECK11-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK11-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP23]], [[CONV20]] -// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK11-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK11-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK11-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK11-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP26]], [[CONV27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK11-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK11-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK11-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP25]], [[MUL33]] -// CHECK11-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK11-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK11-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK11-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I13]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = mul nsw i32 [[TMP29]], [[TMP3]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 [[TMP30]] -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[J14]], align 4 -// CHECK11-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i32 [[TMP31]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX38]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP33]], [[CONV18]] +// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV21]], i32* [[I11]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP36]], [[CONV25]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP35]], [[MUL31]] +// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK11-NEXT: store i32 [[CONV35]], i32* [[J12]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[I11]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = mul nsw i32 [[TMP39]], [[TMP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 [[TMP40]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[J12]], align 4 +// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i32 [[TMP41]] +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX36]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP32]], 1 -// CHECK11-NEXT: store i64 [[ADD39]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK11-NEXT: store i64 [[ADD37]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK11-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -1608,18 +1714,21 @@ // CHECK11-SAME: ([10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x [2 x i32]]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [10 x [2 x i32]]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x [2 x i32]]* [[TMP0]], [10 x [2 x i32]]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1627,64 +1736,75 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 19, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x [2 x i32]]* [[TMP0]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [10 x [2 x i32]]* [[TMP2]], [10 x [2 x i32]]** [[TMP14]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1696,71 +1816,75 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], i32* [[J]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP6]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[J]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP20]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp @@ -142,89 +142,105 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [2 x i32]*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[X]], i64* [[X_ADDR]], align 8 // CHECK1-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[X_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32* [[CONV]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[X_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]], [2 x i32]* [[TMP0]], i32* [[TMP1]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x i32]* [[TMP2]], [2 x i32]** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP19]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -234,66 +250,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[X_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP3]] to i32 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -344,89 +364,105 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [2 x i32]*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[X]], i64* [[X_ADDR]], align 8 // CHECK1-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[X_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32* [[CONV]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[X_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]], [2 x i32]* [[TMP0]], i32* [[TMP1]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x i32]* [[TMP2]], [2 x i32]** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP19]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -436,66 +472,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[X_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP3]] to i32 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -560,86 +600,102 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [2 x i32]*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 // CHECK3-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32* [[X_ADDR]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[X_ADDR]], i32** [[TMP2]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[X_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP9]], i32 [[TMP10]], [2 x i32]* [[TMP0]], i32* [[TMP1]]) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x i32]* [[TMP2]], [2 x i32]** [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i32* [[TMP4]], i32** [[TMP17]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -649,63 +705,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[X_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP6]], i32 0, i32 [[TMP20]] +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK3-NEXT: ret void // // @@ -755,86 +815,102 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [2 x i32]*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 // CHECK3-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32* [[X_ADDR]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[X_ADDR]], i32** [[TMP2]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[X_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP9]], i32 [[TMP10]], [2 x i32]* [[TMP0]], i32* [[TMP1]]) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x i32]* [[TMP2]], [2 x i32]** [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i32* [[TMP4]], i32** [[TMP17]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -844,63 +920,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[X_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP6]], i32 0, i32 [[TMP20]] +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK3-NEXT: ret void // // @@ -934,89 +1014,105 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [2 x i32]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[X]], i64* [[X_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[X_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32* [[CONV]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[X_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]], [2 x i32]* [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x i32]* [[TMP2]], [2 x i32]** [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32* [[TMP4]], i32** [[TMP19]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[X_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1024,76 +1120,80 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[X]], i32** [[X_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[X_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP3]] to i32 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: store i32* [[I]], i32** [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP16]], align 8 -// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store [2 x i32]* [[TMP6]], [2 x i32]** [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[I]], i32** [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP8]], i32** [[TMP23]], align 8 +// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp @@ -236,83 +236,97 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -322,64 +336,68 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -387,83 +405,97 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -473,64 +505,68 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -538,103 +574,117 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP15]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], 122 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 122 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: -// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP18]], [[COND_FALSE6]] ] +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -644,64 +694,68 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -805,81 +859,95 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -889,61 +957,65 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -951,81 +1023,95 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1035,61 +1121,65 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1097,101 +1187,115 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP13]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], 122 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 122 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP16]], [[COND_FALSE6]] ] +// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] // CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1201,61 +1305,65 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1501,24 +1609,29 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1528,90 +1641,103 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1624,84 +1750,88 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1713,24 +1843,29 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1740,90 +1875,103 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1836,84 +1984,88 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1927,7 +2079,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -1938,23 +2090,26 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -1964,123 +2119,135 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK9-NEXT: store i64 [[TMP24]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP33]], i32* [[TMP32]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] -// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] -// CHECK9: cond.true12: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: br label [[COND_END14:%.*]] -// CHECK9: cond.false13: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: br label [[COND_END14]] -// CHECK9: cond.end14: -// CHECK9-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE12]] ], [ [[TMP32]], [[COND_FALSE13]] ] -// CHECK9-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK9: cond.true11: +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: br label [[COND_END13:%.*]] +// CHECK9: cond.false12: +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: br label [[COND_END13]] +// CHECK9: cond.end13: +// CHECK9-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] +// CHECK9-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP44]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2090,89 +2257,94 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 8 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -2272,83 +2444,97 @@ // CHECK9-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2358,63 +2544,67 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK9-NEXT: ret void // // @@ -2422,83 +2612,97 @@ // CHECK9-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2508,63 +2712,67 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK9-NEXT: ret void // // @@ -2574,122 +2782,133 @@ // CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK9-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP20]], 9 -// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] -// CHECK9: cond.true6: -// CHECK9-NEXT: br label [[COND_END8:%.*]] -// CHECK9: cond.false7: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: br label [[COND_END8]] -// CHECK9: cond.end8: -// CHECK9-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP21]], [[COND_FALSE7]] ] -// CHECK9-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP27]], 9 +// CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK9: cond.true5: +// CHECK9-NEXT: br label [[COND_END7:%.*]] +// CHECK9: cond.false6: +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: br label [[COND_END7]] +// CHECK9: cond.end7: +// CHECK9-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP28]], [[COND_FALSE6]] ] +// CHECK9-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2699,65 +2918,70 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK9-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK9-NEXT: ret void // // @@ -3001,23 +3225,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3027,88 +3256,101 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[TMP6]], i32** [[TMP26]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3121,81 +3363,85 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -3207,23 +3453,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3233,88 +3484,101 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[TMP6]], i32** [[TMP26]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3327,81 +3591,85 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -3415,7 +3683,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -3424,22 +3692,26 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3449,119 +3721,133 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP24]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], i32* [[TMP30]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK11-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK11: cond.true11: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END13:%.*]] // CHECK11: cond.false12: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END13]] // CHECK11: cond.end13: -// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] +// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE11]] ], [ [[TMP41]], [[COND_FALSE12]] ] // CHECK11-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP42]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3574,82 +3860,88 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP29]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -3748,81 +4040,95 @@ // CHECK11-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP14]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3832,60 +4138,64 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK11-NEXT: ret void // // @@ -3893,81 +4203,95 @@ // CHECK11-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP14]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3977,60 +4301,64 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK11-NEXT: ret void // // @@ -4040,116 +4368,130 @@ // CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK11-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[TMP17]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP25]], 9 // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK11: cond.true5: // CHECK11-NEXT: br label [[COND_END7:%.*]] // CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END7]] // CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ] +// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP26]], [[COND_FALSE6]] ] // CHECK11-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4159,61 +4501,67 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP19]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp @@ -370,8 +370,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -382,142 +381,159 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i64 2 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP26]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP39]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done11: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // @@ -555,144 +571,146 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP8:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast [2 x %struct.S]* [[TMP10]] to %struct.S* +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP20]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done6: -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP20]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP12]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP22]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP23]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP29]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP34]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN14]], i64 2 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP27]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP39]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done15: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -700,35 +718,35 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) -// CHECK1-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK1-NEXT: store %struct.S.2* [[TEST]], %struct.S.2** [[VAR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S.2*, %struct.S.2** [[VAR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP1]], %struct.S.2** [[TMP]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64* // CHECK1-NEXT: store i64 [[TMP3]], i64* [[TMP8]], align 8 @@ -746,19 +764,19 @@ // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK1-NEXT: store i8* null, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.2]** +// CHECK1-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP18]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.0]** -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.2]** +// CHECK1-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP20]], align 8 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 // CHECK1-NEXT: store i8* null, i8** [[TMP21]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK1-NEXT: store %struct.S.0* [[TMP5]], %struct.S.0** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.2** +// CHECK1-NEXT: store %struct.S.2* [[TMP5]], %struct.S.2** [[TMP23]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.0** -// CHECK1-NEXT: store %struct.S.0* [[TMP6]], %struct.S.0** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.2** +// CHECK1-NEXT: store %struct.S.2* [[TMP6]], %struct.S.2** [[TMP25]], align 8 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 // CHECK1-NEXT: store i8* null, i8** [[TMP26]], align 8 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -768,21 +786,21 @@ // CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK1-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.2]* [[S_ARR]], %struct.S.2* [[TMP4]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done2: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK1-NEXT: ret i32 [[TMP32]] // @@ -830,365 +848,385 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 -// CHECK1-SAME: (i64 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i64 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.2]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.2]*, align 8 +// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[S_ARR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[S_ARR_ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.2*, %struct.S.2** [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP2]], %struct.S.2** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S.2]* [[TMP1]], [2 x %struct.S.2]** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP8]], %struct.S.2** [[TMP7]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP7]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP8]], %struct.S.2** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S.2]* [[TMP6]] to %struct.S.2* +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.2* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP11]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.2* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP13:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TMP13]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK1-NEXT: store i64 [[TMP22]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK1-NEXT: store i64 [[TMP24]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP32:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP4]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP32]], %struct.S.2** [[TMP31]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i64 2 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP26]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done11: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP37]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done7: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_2St -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[S]], %struct.S.2** [[S_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S.2*, %struct.S.2** [[S_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP8:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP9:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP4:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP11]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP12]], %struct.S.2** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S.2]* [[TMP10]] to %struct.S.2* +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.2* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP17]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done6: -// CHECK1-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP9]], %struct.St* noundef [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) #[[ATTR2]] -// CHECK1-NEXT: store %struct.S.0* [[VAR7]], %struct.S.0** [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.2* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done3: +// CHECK1-NEXT: [[TMP19:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TMP19]], %struct.St* noundef [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP21]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM11]] -// CHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX12]] to i8* -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S.2* [[ARRAYIDX8]] to i8* +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S.2* [[TMP30]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN14]], i64 2 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN10]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP27]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done15: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP37]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done11: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1197,16 +1235,16 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_2St -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 -// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[S]], %struct.S.2** [[S_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S.2*, %struct.S.2** [[S_ADDR]], align 8 +// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[T]], i32 0, i32 0 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 @@ -1216,11 +1254,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -1432,8 +1470,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1442,134 +1479,157 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC1]] to i8* -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP28]], i32* [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP24]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP37]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done8: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done5: // CHECK3-NEXT: ret void // // @@ -1607,138 +1667,142 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC1]] to i8* -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast [2 x %struct.S]* [[TMP10]] to %struct.S* +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP20]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP20]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP12]], %struct.St* noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP22]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP23]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP18]] -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* -// CHECK3-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP30]] +// CHECK3-NEXT: store i32 [[TMP29]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP31]] +// CHECK3-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK3-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP35]], [[TMP34]] +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP36]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP27]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP39]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1746,34 +1810,34 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK3-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK3-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP1]], %struct.S.1** [[TMP]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[T_VAR_CASTED]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32* // CHECK3-NEXT: store i32 [[TMP3]], i32* [[TMP8]], align 4 @@ -1791,19 +1855,19 @@ // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK3-NEXT: store i8* null, i8** [[TMP16]], align 4 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.1]** +// CHECK3-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP18]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.0]** -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.1]** +// CHECK3-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP20]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 // CHECK3-NEXT: store i8* null, i8** [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK3-NEXT: store %struct.S.0* [[TMP5]], %struct.S.0** [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.1** +// CHECK3-NEXT: store %struct.S.1* [[TMP5]], %struct.S.1** [[TMP23]], align 4 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.0** -// CHECK3-NEXT: store %struct.S.0* [[TMP6]], %struct.S.0** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.1** +// CHECK3-NEXT: store %struct.S.1* [[TMP6]], %struct.S.1** [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 // CHECK3-NEXT: store i8* null, i8** [[TMP26]], align 4 // CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -1813,21 +1877,21 @@ // CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK3-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.1]* [[S_ARR]], %struct.S.1* [[TMP4]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK3: arraydestroy.done2: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK3-NEXT: ret i32 [[TMP32]] // @@ -1875,354 +1939,378 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK3-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 -// CHECK3-SAME: (i32 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.1]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 4 +// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP2]], %struct.S.1** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x %struct.S.1]* [[TMP1]], [2 x %struct.S.1]** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP7]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S.1]* [[TMP6]] to %struct.S.1* +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP11]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP13:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP13]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP30:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP4]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP30]], %struct.S.1** [[TMP29]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i32 2 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP24]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done10: +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP35]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done7: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_2St -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[S]], %struct.S.1** [[S_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S.1*, %struct.S.1** [[S_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP11]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S.1]* [[TMP10]] to %struct.S.1* +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP17]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP9]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP19:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP19]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP21]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* -// CHECK3-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP29]] +// CHECK3-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP31]] +// CHECK3-NEXT: [[TMP32:%.*]] = bitcast %struct.S.1* [[ARRAYIDX6]] to i8* +// CHECK3-NEXT: [[TMP33:%.*]] = bitcast %struct.S.1* [[TMP30]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP27]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP37]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -2231,16 +2319,16 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_2St -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 4 -// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[S]], %struct.S.1** [[S_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S.1*, %struct.S.1** [[S_ADDR]], align 4 +// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[TMP0]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[T]], i32 0, i32 0 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 @@ -2250,11 +2338,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -2404,9 +2492,7 @@ // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 @@ -2414,201 +2500,211 @@ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load volatile i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* @g, align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[G_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[G]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[G1]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[SIVAR]], align 4 +// CHECK9-NEXT: store i32* [[G1]], i32** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4 -// CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK9-NEXT: store i64 [[TMP17]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[G]], align 4 +// CHECK9-NEXT: store i32 [[TMP21]], i32* [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* @g, align 4 +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[G_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[G]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], i32* [[G1]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 +// CHECK9-NEXT: store i32* [[G1]], i32** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8 -// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK9-NEXT: store i32 1, i32* [[G]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK9-NEXT: store volatile i32 1, i32* [[TMP21]], align 4 +// CHECK9-NEXT: store i32 2, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[G]], i32** [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK9-NEXT: store i32* [[TMP24]], i32** [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP25]], align 8 +// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp @@ -140,77 +140,89 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -220,139 +232,155 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l52 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -362,43 +390,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: call void @_Z9gtid_testv() @@ -406,14 +438,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -483,77 +515,89 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -563,43 +607,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn4v() @@ -607,96 +655,108 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l89 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -706,43 +766,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn5v() @@ -750,14 +814,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -766,107 +830,120 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK1-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK1-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL3]], i8* [[CONV2]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL2]], i8* [[TMP1]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -876,43 +953,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn6v() @@ -920,14 +1001,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -995,77 +1076,89 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1075,43 +1168,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn1v() @@ -1119,96 +1216,108 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..12 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1218,43 +1327,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn2v() @@ -1262,14 +1375,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -1278,107 +1391,120 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK1-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK1-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL3]], i8* [[CONV2]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL2]], i8* [[TMP1]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK1-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1388,43 +1514,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn3v() @@ -1432,14 +1562,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp @@ -201,6 +201,7 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 @@ -210,20 +211,25 @@ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* // CHECK1-NEXT: store double* [[CONV1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[CONV]], double* [[TMP0]], i32* [[CONV2]], float* [[CONV3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[CONV]], double** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP2]], double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[CONV3]], float** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -231,98 +237,112 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store double* [[G]], double** [[G_ADDR]], align 8 -// CHECK1-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store double* [[TMP1]], double** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK1-NEXT: store double* [[TMP4]], double** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[G13]], double** [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[G1]], double** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load double*, double** [[_TMP4]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, double*, double*, i32*, float*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP13]], i64 [[TMP15]], double* [[G2]], double* [[TMP16]], i32* [[SVAR5]], float* [[SFVAR6]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK1-NEXT: store i64 [[TMP18]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store double* [[G]], double** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP25:%.*]] = load double*, double** [[_TMP2]], align 8 +// CHECK1-NEXT: store double* [[TMP25]], double** [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP27]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TMP31]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP21:%.*]] = load double, double* [[G2]], align 8 -// CHECK1-NEXT: store volatile double [[TMP21]], double* [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load double*, double** [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load double, double* [[TMP22]], align 8 -// CHECK1-NEXT: store volatile double [[TMP23]], double* [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP24]], i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load float, float* [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP25]], float* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load double, double* [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP32]], double* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load double*, double** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load double, double* [[TMP33]], align 8 +// CHECK1-NEXT: store volatile double [[TMP34]], double* [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP35]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP36]], float* [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -330,105 +350,109 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store double* [[G]], double** [[G_ADDR]], align 8 -// CHECK1-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store double* [[TMP1]], double** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double*, double** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP11]], align 8 +// CHECK1-NEXT: store double* [[TMP8]], double** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[G14]], double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[G1]], double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: store double 1.000000e+00, double* [[G3]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP15]], align 8 -// CHECK1-NEXT: store i32 3, i32* [[SVAR6]], align 4 -// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[G3]], double** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK1-NEXT: store double* [[TMP18]], double** [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[SVAR6]], i32** [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[SFVAR7]], float** [[TMP20]], align 8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK1-NEXT: store double 1.000000e+00, double* [[G]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP3]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP24]], align 8 +// CHECK1-NEXT: store i32 3, i32* [[SVAR]], align 4 +// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[G]], double** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load double*, double** [[_TMP3]], align 8 +// CHECK1-NEXT: store double* [[TMP27]], double** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP29]], align 8 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP24]], double* [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load double, double* [[TMP25]], align 8 -// CHECK1-NEXT: store volatile double [[TMP26]], double* [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP27]], i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load float, float* [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP28]], float* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load double, double* [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP33]], double* [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load double*, double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, double* [[TMP34]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], double* [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], i32* [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP37]], float* [[TMP12]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -470,6 +494,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 @@ -484,20 +509,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], double* [[G13]], align 8 // CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[G2]], double* [[TMP5]], i32* [[SVAR_ADDR]], float* [[CONV]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G2]], double** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load double*, double** [[_TMP4]], align 4 +// CHECK3-NEXT: store double* [[TMP7]], double** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[CONV]], float** [[TMP9]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -505,96 +535,110 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store double* [[TMP1]], double** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK3-NEXT: store double* [[TMP4]], double** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, double*, i32*, float*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP12]], i32 [[TMP13]], double* [[G2]], double* [[TMP14]], i32* [[SVAR5]], float* [[SFVAR6]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store double* [[G]], double** [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: store double* [[TMP23]], double** [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP25]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP19:%.*]] = load double, double* [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP19]], double* [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load double, double* [[TMP20]], align 4 -// CHECK3-NEXT: store volatile double [[TMP21]], double* [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP22]], i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load float, float* [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP23]], float* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP30]], double* [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load double, double* [[TMP31]], align 4 +// CHECK3-NEXT: store volatile double [[TMP32]], double* [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP33]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP34]], float* [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -602,103 +646,107 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store double* [[TMP1]], double** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double*, double** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP11]], align 4 +// CHECK3-NEXT: store double* [[TMP8]], double** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: store double 1.000000e+00, double* [[G2]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP15]], align 4 -// CHECK3-NEXT: store i32 3, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G2]], double** [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP18:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: store double* [[TMP18]], double** [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR5]], i32** [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[SFVAR6]], float** [[TMP20]], align 4 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK3-NEXT: store double 1.000000e+00, double* [[G]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP24]], align 4 +// CHECK3-NEXT: store i32 3, i32* [[SVAR]], align 4 +// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: store double* [[TMP27]], double** [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP29]], align 4 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP24]], double* [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP25:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load double, double* [[TMP25]], align 4 -// CHECK3-NEXT: store volatile double [[TMP26]], double* [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP27]], i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load float, float* [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP28]], float* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP33]], double* [[TMP6]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, double* [[TMP34]], align 4 +// CHECK3-NEXT: store volatile double [[TMP35]], double* [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP36]], i32* [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP37]], float* [[TMP12]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -853,6 +901,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -864,21 +913,27 @@ // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK9-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32* [[CONV]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[CONV1]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP8]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -886,31 +941,36 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -920,112 +980,122 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store %struct.S* [[VAR5]], %struct.S** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i32*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP14]], i64 [[TMP16]], [2 x i32]* [[VEC3]], i32* [[T_VAR2]], [2 x %struct.S]* [[S_ARR4]], %struct.S* [[TMP17]], i32* [[SVAR7]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK9-NEXT: store i64 [[TMP20]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK9-NEXT: store i64 [[TMP22]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP29:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP29]], %struct.S** [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store i32* [[SVAR]], i32** [[TMP30]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP24]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = bitcast [2 x %struct.S]* [[S_ARR4]] to %struct.S* -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN9]], [[TMP28]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP37]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK9-NEXT: [[TMP39:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP40:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN4]], [[TMP41]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP27]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP42:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done10: -// CHECK9-NEXT: [[TMP31:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[TMP5]] to i8* -// CHECK9-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[TMP31]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK9-NEXT: store i32 [[TMP34]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP41]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done5: +// CHECK9-NEXT: [[TMP44:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[TMP11]] to i8* +// CHECK9-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[TMP44]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP47]], i32* [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP48]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done12: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done7: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1033,39 +1103,43 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP12]], %struct.S** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1075,108 +1149,108 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store %struct.S* [[VAR6]], %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* -// CHECK9-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[TMP18]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP26]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* +// CHECK9-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[TMP28]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP27]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP30:%.*]] = bitcast [2 x %struct.S]* [[S_ARR5]] to %struct.S* -// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN13]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN13]], [[TMP31]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP37]], i32* [[TMP8]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK9-NEXT: [[TMP39:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP40:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN8]], [[TMP41]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP30]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP42:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP31]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done14: -// CHECK9-NEXT: [[TMP34:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[TMP7]] to i8* -// CHECK9-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[TMP34]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP37]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP41]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP44:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[TMP17]] to i8* +// CHECK9-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[TMP44]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP47]], i32* [[TMP14]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN15]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP48]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done16: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1194,35 +1268,35 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) -// CHECK9-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK9-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP1]], %struct.S.1** [[TMP]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64* // CHECK9-NEXT: store i64 [[TMP3]], i64* [[TMP8]], align 8 @@ -1240,19 +1314,19 @@ // CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK9-NEXT: store i8* null, i8** [[TMP16]], align 8 // CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK9-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.1]** +// CHECK9-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP18]], align 8 // CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK9-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.0]** -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.1]** +// CHECK9-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP20]], align 8 // CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 // CHECK9-NEXT: store i8* null, i8** [[TMP21]], align 8 // CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK9-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK9-NEXT: store %struct.S.0* [[TMP5]], %struct.S.0** [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.1** +// CHECK9-NEXT: store %struct.S.1* [[TMP5]], %struct.S.1** [[TMP23]], align 8 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK9-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.0** -// CHECK9-NEXT: store %struct.S.0* [[TMP6]], %struct.S.0** [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.1** +// CHECK9-NEXT: store %struct.S.1* [[TMP6]], %struct.S.1** [[TMP25]], align 8 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 // CHECK9-NEXT: store i8* null, i8** [[TMP26]], align 8 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -1262,21 +1336,21 @@ // CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK9-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: -// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]] +// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.1]* [[S_ARR]], %struct.S.1* [[TMP4]]) #[[ATTR4]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done2: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK9-NEXT: ret i32 [[TMP32]] // @@ -1316,390 +1390,414 @@ // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 -// CHECK9-SAME: (i64 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i64 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.1]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 8 +// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK9-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32* [[CONV]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP2]], %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S.1]* [[TMP1]], [2 x %struct.S.1]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP7]], %struct.S.1** [[TMP6]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: -// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP13]], i64 [[TMP15]], [2 x i32]* [[VEC3]], i32* [[T_VAR2]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP16]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK9-NEXT: store i64 [[TMP18]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK9-NEXT: store i64 [[TMP20]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP27:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP27]], %struct.S.1** [[TMP26]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]]) +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK9-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP26:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN8]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP34]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK9-NEXT: [[TMP36:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP37:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN4]], [[TMP38]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP26]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done9: -// CHECK9-NEXT: [[TMP30:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[TMP4]] to i8* -// CHECK9-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[TMP30]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP37]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP39:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP40:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP39]], i8* align 4 [[TMP40]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done5: +// CHECK9-NEXT: [[TMP41:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = bitcast %struct.S.1* [[TMP9]] to i8* +// CHECK9-NEXT: [[TMP43:%.*]] = bitcast %struct.S.1* [[TMP41]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP33]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done11: +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP44]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done7: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP11]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: -// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store %struct.S.0* [[VAR6]], %struct.S.0** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[ARRAYIDX10]] to i8* -// CHECK9-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[TMP17]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: [[TMP28:%.*]] = bitcast %struct.S.1* [[ARRAYIDX6]] to i8* +// CHECK9-NEXT: [[TMP29:%.*]] = bitcast %struct.S.1* [[TMP26]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP26]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR5]] to %struct.S.0* -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN12]], [[TMP30]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP35]], i32* [[TMP8]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK9-NEXT: [[TMP37:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN8]], [[TMP39]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP33:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[TMP6]] to i8* -// CHECK9-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[TMP33]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP40:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP41:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP39]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP42:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = bitcast %struct.S.1* [[TMP15]] to i8* +// CHECK9-NEXT: [[TMP44:%.*]] = bitcast %struct.S.1* [[TMP42]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP43]], i8* align 4 [[TMP44]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP45]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, i32* [[F]], align 4 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1850,6 +1948,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1859,21 +1958,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 // CHECK11-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32* [[T_VAR_ADDR]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1881,31 +1986,36 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1915,110 +2025,120 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store %struct.S* [[VAR5]], %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP13]], i32 [[TMP14]], [2 x i32]* [[VEC3]], i32* [[T_VAR2]], [2 x %struct.S]* [[S_ARR4]], %struct.S* [[TMP15]], i32* [[SVAR7]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP27:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP27]], %struct.S** [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store i32* [[SVAR]], i32** [[TMP28]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP22]], i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP23]], i8* align 4 [[TMP24]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = bitcast [2 x %struct.S]* [[S_ARR4]] to %struct.S* -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN9]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP35]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK11-NEXT: [[TMP37:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN4]], [[TMP39]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP25]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP40:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP41:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done10: -// CHECK11-NEXT: [[TMP29:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[TMP5]] to i8* -// CHECK11-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[TMP29]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP32]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP39]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done5: +// CHECK11-NEXT: [[TMP42:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[TMP11]] to i8* +// CHECK11-NEXT: [[TMP44:%.*]] = bitcast %struct.S* [[TMP42]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP43]], i8* align 4 [[TMP44]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP45]], i32* [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP33]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP46]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done12: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done7: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2026,37 +2146,41 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP12]], %struct.S** [[TMP]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -2066,106 +2190,106 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store %struct.S* [[VAR5]], %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* -// CHECK11-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[TMP18]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP29]] +// CHECK11-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK11-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[TMP28]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP27]], i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast [2 x %struct.S]* [[S_ARR4]] to %struct.S* -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN11]], [[TMP31]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP37]], i32* [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK11-NEXT: [[TMP39:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP40:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN6]], [[TMP41]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP30]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP42:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP31]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP34:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[TMP7]] to i8* -// CHECK11-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[TMP34]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP37]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP41]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP44:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[TMP17]] to i8* +// CHECK11-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[TMP44]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP47]], i32* [[TMP14]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP48]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -2183,34 +2307,34 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK11-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK11-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK11-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK11-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP1]], %struct.S.1** [[TMP]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK11-NEXT: store i32 [[TMP2]], i32* [[T_VAR_CASTED]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32* // CHECK11-NEXT: store i32 [[TMP3]], i32* [[TMP8]], align 4 @@ -2228,19 +2352,19 @@ // CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK11-NEXT: store i8* null, i8** [[TMP16]], align 4 // CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.1]** +// CHECK11-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP18]], align 4 // CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.0]** -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.1]** +// CHECK11-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP20]], align 4 // CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 // CHECK11-NEXT: store i8* null, i8** [[TMP21]], align 4 // CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK11-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK11-NEXT: store %struct.S.0* [[TMP5]], %struct.S.0** [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.1** +// CHECK11-NEXT: store %struct.S.1* [[TMP5]], %struct.S.1** [[TMP23]], align 4 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK11-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.0** -// CHECK11-NEXT: store %struct.S.0* [[TMP6]], %struct.S.0** [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.1** +// CHECK11-NEXT: store %struct.S.1* [[TMP6]], %struct.S.1** [[TMP25]], align 4 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 // CHECK11-NEXT: store i8* null, i8** [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -2250,21 +2374,21 @@ // CHECK11-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK11-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: -// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]] +// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.1]* [[S_ARR]], %struct.S.1* [[TMP4]]) #[[ATTR4]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done2: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK11-NEXT: ret i32 [[TMP32]] // @@ -2304,383 +2428,407 @@ // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 -// CHECK11-SAME: (i32 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.1]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 4 +// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 +// CHECK11-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32* [[T_VAR_ADDR]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP2]], %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S.1]* [[TMP1]], [2 x %struct.S.1]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP7]], %struct.S.1** [[TMP6]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: -// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP12]], i32 [[TMP13]], [2 x i32]* [[VEC3]], i32* [[T_VAR2]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP14]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP19]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP25:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP25]], %struct.S.1** [[TMP24]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: [[TMP23:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP24:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN8]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP32]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK11-NEXT: [[TMP34:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP35:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN4]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP24]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done9: -// CHECK11-NEXT: [[TMP28:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast %struct.S.0* [[TMP4]] to i8* -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[TMP28]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP35]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP37:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP38:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done5: +// CHECK11-NEXT: [[TMP39:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = bitcast %struct.S.1* [[TMP9]] to i8* +// CHECK11-NEXT: [[TMP41:%.*]] = bitcast %struct.S.1* [[TMP39]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done11: +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP42]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done7: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP11]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: -// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 [[TMP18]] -// CHECK11-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* -// CHECK11-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[TMP17]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP25]] +// CHECK11-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: [[TMP28:%.*]] = bitcast %struct.S.1* [[ARRAYIDX4]] to i8* +// CHECK11-NEXT: [[TMP29:%.*]] = bitcast %struct.S.1* [[TMP26]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK11-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP26]], i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN10]], [[TMP30]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP35]], i32* [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK11-NEXT: [[TMP37:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN6]], [[TMP39]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP33:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[TMP6]] to i8* -// CHECK11-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[TMP33]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP40:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP41:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP39]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP42:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = bitcast %struct.S.1* [[TMP15]] to i8* +// CHECK11-NEXT: [[TMP44:%.*]] = bitcast %struct.S.1* [[TMP42]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP43]], i8* align 4 [[TMP44]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP45]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, i32* [[F]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp @@ -187,78 +187,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l50 // CHECK1-SAME: () #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -268,43 +280,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -314,20 +330,20 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR9:[0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR9:[0-9]+]] // CHECK1-NEXT: unreachable // // @@ -343,92 +359,103 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: store i8 [[TMP0]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK1-NEXT: store i8 [[TMP1]], i8* [[CONV1]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], i8* [[TMP1]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -438,43 +465,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -484,20 +515,20 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR9]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR9]] // CHECK1-NEXT: unreachable // // @@ -601,78 +632,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l36 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 5) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -682,43 +725,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -728,98 +775,110 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR9]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR9]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l40 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 23) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -829,43 +888,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -875,98 +938,110 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR9]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR9]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l36 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -976,43 +1051,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -1022,20 +1101,20 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR9]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR9]] // CHECK1-NEXT: unreachable // // @@ -1044,18 +1123,17 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 // CHECK1-NEXT: invoke void @_ZN1SC1El(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: // CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i8 @_ZN1ScvcEv(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK1-NEXT: call void @_ZN1SD1Ev(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] // CHECK1-NEXT: store i8 [[CALL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK1-NEXT: store i8 [[TMP0]], i8* [[CONV]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP1]], i8* [[TMP0]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: // CHECK1-NEXT: [[TMP2:%.*]] = landingpad { i8*, i32 } @@ -1066,78 +1144,90 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK1-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1147,43 +1237,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -1193,20 +1287,20 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR9]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR9]] // CHECK1-NEXT: unreachable // // @@ -1331,78 +1425,90 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l50 // CHECK5-SAME: () #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1412,43 +1518,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -1458,20 +1568,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR9:[0-9]+]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR9:[0-9]+]] // CHECK5-NEXT: unreachable // // @@ -1487,92 +1597,103 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* // CHECK5-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: store i8 [[TMP0]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK5-NEXT: store i8 [[TMP1]], i8* [[CONV1]], align 1 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: store i8 [[TMP2]], i8* [[TMP1]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK5-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK5-NEXT: store i8 [[TMP2]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]) +// CHECK5-NEXT: [[TMP10:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1582,43 +1703,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -1628,20 +1753,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR9]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR9]] // CHECK5-NEXT: unreachable // // @@ -1736,78 +1861,90 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l36 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 5) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1817,43 +1954,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -1863,98 +2004,110 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR9]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR9]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l40 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 23) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1964,43 +2117,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2010,98 +2167,110 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR9]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR9]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l36 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2111,43 +2280,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2157,20 +2330,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR9]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR9]] // CHECK5-NEXT: unreachable // // @@ -2179,18 +2352,17 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 // CHECK5-NEXT: invoke void @_ZN1SC1El(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) // CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK5: invoke.cont: // CHECK5-NEXT: [[CALL:%.*]] = call noundef signext i8 @_ZN1ScvcEv(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK5-NEXT: call void @_ZN1SD1Ev(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] // CHECK5-NEXT: store i8 [[CALL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[TMP0:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK5-NEXT: store i8 [[TMP0]], i8* [[CONV]], align 1 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: store i8 [[TMP1]], i8* [[TMP0]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: // CHECK5-NEXT: [[TMP2:%.*]] = landingpad { i8*, i32 } @@ -2201,78 +2373,90 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK5-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK5-NEXT: store i8 [[TMP2]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]) +// CHECK5-NEXT: [[TMP10:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2282,43 +2466,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2328,20 +2516,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { i8*, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP12]]) #[[ATTR9]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { i8*, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP17]]) #[[ATTR9]] // CHECK5-NEXT: unreachable // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp @@ -276,29 +276,36 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -314,55 +321,61 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -372,12 +385,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -392,14 +404,18 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -415,70 +431,70 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM3]] -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP25]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -491,23 +507,23 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) -// CHECK1-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK1-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK1-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 8 +// CHECK1-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK1-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2) // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0) // CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 @@ -517,292 +533,308 @@ // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done2: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK1-NEXT: ret i32 [[TMP4]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN4]], i64 2 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false) +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S.1* [[ARRAYIDX6]] to i8* +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S.1* [[TMP17]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP19]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP24]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done9: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -811,11 +843,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -969,29 +1001,36 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -1007,53 +1046,59 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP11]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -1063,12 +1108,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1083,14 +1127,18 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 @@ -1104,68 +1152,68 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP12]] -// CHECK3-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP17]] +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] // CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP25]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1178,23 +1226,23 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK3-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 4 -// CHECK3-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK3-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 4 +// CHECK3-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK3-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2) // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0) // CHECK3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 @@ -1204,286 +1252,302 @@ // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK3: arraydestroy.done2: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK3-NEXT: ret i32 [[TMP4]] // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK3-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: -// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK3-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP11]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN4]], i32 2 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK3: arraydestroy.done5: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: -// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK3-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast %struct.S.1* [[ARRAYIDX4]] to i8* +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast %struct.S.1* [[TMP17]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP19]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP24]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK3: arraydestroy.done7: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1492,11 +1556,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1644,18 +1708,20 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 @@ -1663,68 +1729,78 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32* undef, i32** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store i32* [[G1]], i32** [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 @@ -1737,74 +1813,78 @@ // CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32* undef, i32** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store i32* [[G1]], i32** [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK9-NEXT: store i32 1, i32* [[G]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK9-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK9-NEXT: store volatile i32 1, i32* [[TMP15]], align 4 // CHECK9-NEXT: store i32 2, i32* [[SIVAR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[G]], i32** [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP3]], align 8 -// CHECK9-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP14]], align 8 -// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[G]], i32** [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP3]], align 8 +// CHECK9-NEXT: store i32* [[TMP18]], i32** [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP19]], align 8 +// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp @@ -80,78 +80,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l36 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 4) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 4) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -161,135 +173,151 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l39 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -299,57 +327,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -371,78 +403,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l29 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -452,57 +496,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp @@ -133,195 +133,213 @@ // CHECK1-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i32* [[SIVAR1]]) +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[TMP16]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[SIVAR2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR2]] to i8* -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, i8* [[TMP22]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -417,195 +435,213 @@ // CHECK1-SAME: (i64 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[T_VAR1]], align 4 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i32* [[T_VAR1]]) +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP16]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[T_VAR1]] to i8* -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[T_VAR2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[T_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[T_VAR2]] to i8* -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, i8* [[TMP22]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -705,190 +741,208 @@ // CHECK3-SAME: (i32 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[SIVAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[SIVAR_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], i32* [[SIVAR1]]) +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[TMP14]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP14]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP19]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP23]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, i8* [[TMP22]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -983,190 +1037,208 @@ // CHECK3-SAME: (i32 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[T_VAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[T_VAR1]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], i32* [[T_VAR1]]) +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[T_VAR]], i32** [[TMP14]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = bitcast i32* [[T_VAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP14]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP19]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP23]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[T_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i32* [[T_VAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, i8* [[TMP22]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1241,199 +1313,217 @@ // CHECK9-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i32* [[SIVAR1]]) +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP16]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK9-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP20:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK9-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK9-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: store i32 0, i32* [[SIVAR2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK9-NEXT: store i32 0, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR2]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[SIVAR2]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[SIVAR2]], i32** [[TMP13]], align 8 -// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP19]], align 8 +// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK9-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP16:%.*]] = bitcast i32* [[SIVAR2]] to i8* -// CHECK9-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP17]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP22:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK9-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK9-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, i8* [[TMP23]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR2]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: store i32 [[ADD6]], i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: store i32 [[ADD5]], i32* [[TMP6]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR2]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP21]] monotonic, align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP27]] monotonic, align 4 // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -58,265 +58,284 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], i8** [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store i8** [[TMP2]], i8*** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[_TMP12:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP11:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP26:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint i8* [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP13]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP14]] +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8*, i8** [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint i8* [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP16]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint i8* [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store i8** [[_TMP5]], i8*** [[TMP]], align 8 -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint i8* [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store i8** [[_TMP4]], i8*** [[TMP]], align 8 +// CHECK1-NEXT: store i8* [[TMP24]], i8** [[_TMP4]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, i64* [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[TMP30]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP31]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8*, i8** [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load i8*, i8** [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, i8* [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP36]] -// CHECK1-NEXT: [[TMP37:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8*, i8** [[TMP37]], i64 9 -// CHECK1-NEXT: [[TMP38:%.*]] = load i8*, i8** [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[TMP38]], i64 [[LB_ADD_LEN9]] -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store i8* [[ARRAYIDX8]], i8** [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint i8* [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint i8* [[ARRAYIDX8]] to i64 -// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] -// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 -// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP45]], i64* [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..1 to i8*), i8** [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..2 to i8*), i8** [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, i32* [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 -// CHECK1-NEXT: [[TMP53:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* -// CHECK1-NEXT: [[TMP54:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, i8* [[TMP53]]) -// CHECK1-NEXT: store i8* [[TMP54]], i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[ARGC]] to i8* +// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP2]] to i8* +// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, i64* [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP34]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP36:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP36]], i64 0 +// CHECK1-NEXT: [[TMP37:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP37]], i64 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 -1, [[TMP39]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i8**, i8*** [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8*, i8** [[TMP40]], i64 9 +// CHECK1-NEXT: [[TMP41:%.*]] = load i8*, i8** [[ARRAYIDX9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 [[LB_ADD_LEN8]] +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 +// CHECK1-NEXT: store i8* [[ARRAYIDX7]], i8** [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = ptrtoint i8* [[ARRAYIDX10]] to i64 +// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint i8* [[ARRAYIDX7]] to i64 +// CHECK1-NEXT: [[TMP45:%.*]] = sub i64 [[TMP43]], [[TMP44]] +// CHECK1-NEXT: [[TMP46:%.*]] = sdiv exact i64 [[TMP45]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP47:%.*]] = add nuw i64 [[TMP46]], 1 +// CHECK1-NEXT: [[TMP48:%.*]] = mul nuw i64 [[TMP47]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP48]], i64* [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..1 to i8*), i8** [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP51]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..2 to i8*), i8** [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_5]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, i32* [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[TMP54]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8* +// CHECK1-NEXT: [[TMP57:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP55]], i32 1, i32 2, i8* [[TMP56]]) +// CHECK1-NEXT: store i8* [[TMP57]], i8** [[DOTTASK_RED_]], align 8 // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 // CHECK1-NEXT: store i64 9, i64* [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP56]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP57]], 9 +// CHECK1-NEXT: [[TMP58:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, i32* [[TMP58]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP59]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP60:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP60]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP58]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP61]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP59]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP62]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP60]], [[TMP61]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP63:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP63]], [[TMP64]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load i8**, i8*** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP62]], i64 [[TMP63]], i32* [[ARGC1]], i8** [[TMP64]]) +// CHECK1-NEXT: [[TMP65:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP65]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP66]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP67]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[ARGC]], i32** [[TMP69]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP71:%.*]] = load i8**, i8*** [[TMP]], align 8 +// CHECK1-NEXT: store i8** [[TMP71]], i8*** [[TMP70]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP65:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP65]], [[TMP66]] +// CHECK1-NEXT: [[TMP72:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP72]], [[TMP73]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP67:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = load i32, i32* [[TMP67]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP68]]) -// CHECK1-NEXT: [[TMP69:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = load i32, i32* [[TMP69]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP70]], i32 1) -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP72:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP72]], i8** [[TMP71]], align 8 -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP73]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP75:%.*]] = inttoptr i64 [[TMP11]] to i8* -// CHECK1-NEXT: store i8* [[TMP75]], i8** [[TMP74]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = load i32, i32* [[TMP74]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP75]]) // CHECK1-NEXT: [[TMP76:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP77:%.*]] = load i32, i32* [[TMP76]], align 4 -// CHECK1-NEXT: [[TMP78:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP79:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP77]], i32 2, i64 24, i8* [[TMP78]], void (i8*, i8*)* @.omp.reduction.reduction_func.9, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP79]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP77]], i32 1) +// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP79:%.*]] = bitcast i32* [[ARGC]] to i8* +// CHECK1-NEXT: store i8* [[TMP79]], i8** [[TMP78]], align 8 +// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP80]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP82:%.*]] = inttoptr i64 [[TMP14]] to i8* +// CHECK1-NEXT: store i8* [[TMP82]], i8** [[TMP81]], align 8 +// CHECK1-NEXT: [[TMP83:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = load i32, i32* [[TMP83]], align 4 +// CHECK1-NEXT: [[TMP85:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP86:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP84]], i32 2, i64 24, i8* [[TMP85]], void (i8*, i8*)* @.omp.reduction.reduction_func.9, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP86]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP80:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP80]], [[TMP81]] -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP82]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP87:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP87]], [[TMP88]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP89]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP83:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP83]] to i32 -// CHECK1-NEXT: [[TMP84:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP84]] to i32 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] -// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 -// CHECK1-NEXT: store i8 [[CONV18]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP90:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP90]] to i32 +// CHECK1-NEXT: [[TMP91:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV15:%.*]] = sext i8 [[TMP91]] to i32 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV]], [[CONV15]] +// CHECK1-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 +// CHECK1-NEXT: store i8 [[CONV17]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP82]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done21: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB4]], i32 [[TMP77]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP89]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done20: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB4]], i32 [[TMP84]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP86:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP85]] monotonic, align 4 -// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP87]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] -// CHECK1: omp.arraycpy.body23: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP88:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP88]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 +// CHECK1-NEXT: [[TMP92:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP93:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP92]] monotonic, align 4 +// CHECK1-NEXT: [[TMP94:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY21:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP94]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY21]], label [[OMP_ARRAYCPY_DONE34:%.*]], label [[OMP_ARRAYCPY_BODY22:%.*]] +// CHECK1: omp.arraycpy.body22: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST23:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT32:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST24:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT31:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP95:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST23]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP95]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST24]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP89:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP94:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP89]], i8* [[_TMP27]], align 1 -// CHECK1-NEXT: [[TMP90:%.*]] = load i8, i8* [[_TMP27]], align 1 -// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP90]] to i32 -// CHECK1-NEXT: [[TMP91:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP91]] to i32 -// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] -// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 -// CHECK1-NEXT: store i8 [[CONV31]], i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP92:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP93:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP89]], i8 [[TMP92]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP94]] = extractvalue { i8, i1 } [[TMP93]], 0 -// CHECK1-NEXT: [[TMP95:%.*]] = extractvalue { i8, i1 } [[TMP93]], 1 -// CHECK1-NEXT: br i1 [[TMP95]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP96:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY22]] ], [ [[TMP101:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP96]], i8* [[_TMP26]], align 1 +// CHECK1-NEXT: [[TMP97:%.*]] = load i8, i8* [[_TMP26]], align 1 +// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP97]] to i32 +// CHECK1-NEXT: [[TMP98:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST23]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP98]] to i32 +// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[CONV27]], [[CONV28]] +// CHECK1-NEXT: [[CONV30:%.*]] = trunc i32 [[ADD29]] to i8 +// CHECK1-NEXT: store i8 [[CONV30]], i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP99:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP100:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST24]], i8 [[TMP96]], i8 [[TMP99]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP101]] = extractvalue { i8, i1 } [[TMP100]], 0 +// CHECK1-NEXT: [[TMP102:%.*]] = extractvalue { i8, i1 } [[TMP100]], 1 +// CHECK1-NEXT: br i1 [[TMP102]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP87]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] -// CHECK1: omp.arraycpy.done35: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT31]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT32]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST23]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE33:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT31]], [[TMP94]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_BODY22]] +// CHECK1: omp.arraycpy.done34: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP96:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP96]]) +// CHECK1-NEXT: [[TMP103:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP103]]) // CHECK1-NEXT: ret void // // @@ -405,291 +424,292 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.1], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x i8*], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP28:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 // CHECK1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 9, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP1]], i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP2]], i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP3]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP6]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8*, i8** [[TMP7]], i64 9 -// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP8]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint i8* [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = add nuw i64 [[TMP12]], 1 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP15:%.*]] = call i8* @llvm.stacksave() -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP13]], align 16 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP16]] +// CHECK1-NEXT: store i32 0, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP13]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8*, i8** [[TMP14]], i64 9 +// CHECK1-NEXT: [[TMP15:%.*]] = load i8*, i8** [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP15]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint i8* [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = sub i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP19:%.*]] = sdiv exact i64 [[TMP18]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP20:%.*]] = add nuw i64 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP22]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP20]], align 16 +// CHECK1-NEXT: store i64 [[TMP20]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[VLA]], [[TMP23]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP17:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i8*, i8** [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint i8* [[TMP18]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint i8* [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = sub i64 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: [[TMP22:%.*]] = sdiv exact i64 [[TMP21]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP22]] -// CHECK1-NEXT: store i8** [[_TMP6]], i8*** [[_TMP5]], align 8 -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[_TMP6]], align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], [2 x %struct.kmp_taskred_input_t.0]* [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, i64* [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..4 to i8*), i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..5 to i8*), i8** [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[TMP32]] to i8* -// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP33]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], [2 x %struct.kmp_taskred_input_t.0]* [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8*, i8** [[TMP35]], i64 0 -// CHECK1-NEXT: [[TMP36:%.*]] = load i8*, i8** [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, i8* [[TMP36]], i64 0 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = sext i32 [[TMP37]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP38]] -// CHECK1-NEXT: [[TMP39:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8*, i8** [[TMP39]], i64 9 -// CHECK1-NEXT: [[TMP40:%.*]] = load i8*, i8** [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, i8* [[TMP40]], i64 [[LB_ADD_LEN10]] -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 -// CHECK1-NEXT: store i8* [[ARRAYIDX9]], i8** [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = ptrtoint i8* [[ARRAYIDX12]] to i64 -// CHECK1-NEXT: [[TMP43:%.*]] = ptrtoint i8* [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP44:%.*]] = sub i64 [[TMP42]], [[TMP43]] -// CHECK1-NEXT: [[TMP45:%.*]] = sdiv exact i64 [[TMP44]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = add nuw i64 [[TMP45]], 1 -// CHECK1-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP47]], i64* [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..6 to i8*), i8** [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 -// CHECK1-NEXT: store i8* null, i8** [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 -// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..7 to i8*), i8** [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], %struct.kmp_taskred_input_t.0* [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, i32* [[TMP52]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t.0]* [[DOTRD_INPUT_]] to i8* -// CHECK1-NEXT: [[TMP56:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP54]], i32 1, i32 2, i8* [[TMP55]]) -// CHECK1-NEXT: store i8* [[TMP56]], i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP57]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP58]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP59]], 9 +// CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i8*, i8** [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = ptrtoint i8* [[TMP25]] to i64 +// CHECK1-NEXT: [[TMP27:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP28:%.*]] = sub i64 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: [[TMP29:%.*]] = sdiv exact i64 [[TMP28]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr i8, i8* [[VLA]], i64 [[TMP29]] +// CHECK1-NEXT: store i8** [[_TMP5]], i8*** [[_TMP4]], align 8 +// CHECK1-NEXT: store i8* [[TMP30]], i8** [[_TMP5]], align 8 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.1], [2 x %struct.kmp_taskred_input_t.1]* [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1:%.*]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast i32* [[ARGC]] to i8* +// CHECK1-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP6]] to i8* +// CHECK1-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, i64* [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..4 to i8*), i8** [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..5 to i8*), i8** [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP40:%.*]] = bitcast i32* [[TMP39]] to i8* +// CHECK1-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP40]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.1], [2 x %struct.kmp_taskred_input_t.1]* [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8*, i8** [[TMP42]], i64 0 +// CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[ARRAYIDX7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, i8* [[TMP43]], i64 0 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = sext i32 [[TMP44]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP45]] +// CHECK1-NEXT: [[TMP46:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8*, i8** [[TMP46]], i64 9 +// CHECK1-NEXT: [[TMP47:%.*]] = load i8*, i8** [[ARRAYIDX10]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[TMP47]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 +// CHECK1-NEXT: store i8* [[ARRAYIDX8]], i8** [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = ptrtoint i8* [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP50:%.*]] = ptrtoint i8* [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP51:%.*]] = sub i64 [[TMP49]], [[TMP50]] +// CHECK1-NEXT: [[TMP52:%.*]] = sdiv exact i64 [[TMP51]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP53:%.*]] = add nuw i64 [[TMP52]], 1 +// CHECK1-NEXT: [[TMP54:%.*]] = mul nuw i64 [[TMP53]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP54]], i64* [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_init..6 to i8*), i8** [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP57]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: store i8* bitcast (void (i8*, i8*)* @.red_comb..7 to i8*), i8** [[TMP58]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], %struct.kmp_taskred_input_t.1* [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, i32* [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, i32* [[TMP60]], align 4 +// CHECK1-NEXT: [[TMP62:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t.1]* [[DOTRD_INPUT_]] to i8* +// CHECK1-NEXT: [[TMP63:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB1]], i32 [[TMP61]], i32 1, i32 2, i8* [[TMP62]]) +// CHECK1-NEXT: store i8* [[TMP63]], i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = load i32, i32* [[TMP64]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP65]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP66:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP66]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP60]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP67]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP61]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP68]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP62]], [[TMP63]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP69:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP69]], [[TMP70]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP64:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP64]], 1 +// CHECK1-NEXT: [[TMP71:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP71]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP65]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[ARGC1]], i32** [[TMP66]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP68:%.*]] = load i8**, i8*** [[_TMP5]], align 8 -// CHECK1-NEXT: store i8** [[TMP68]], i8*** [[TMP67]], align 8 -// CHECK1-NEXT: [[TMP69:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = load i32, i32* [[TMP69]], align 4 -// CHECK1-NEXT: [[TMP71:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP70]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP72:%.*]] = bitcast i8* [[TMP71]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP72]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP73]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP75:%.*]] = load i8*, i8** [[TMP74]], align 8 -// CHECK1-NEXT: [[TMP76:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP75]], i8* align 8 [[TMP76]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP72]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP77]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP79:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store i8* [[TMP79]], i8** [[TMP78]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, i32* [[TMP80]], align 4 -// CHECK1-NEXT: [[TMP82:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP81]], i8* [[TMP71]]) +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[TMP72]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[ARGC]], i32** [[TMP73]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP75:%.*]] = load i8**, i8*** [[_TMP4]], align 8 +// CHECK1-NEXT: store i8** [[TMP75]], i8*** [[TMP74]], align 8 +// CHECK1-NEXT: [[TMP76:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = load i32, i32* [[TMP76]], align 4 +// CHECK1-NEXT: [[TMP78:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP77]], i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP79:%.*]] = bitcast i8* [[TMP78]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP79]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP80]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP82:%.*]] = load i8*, i8** [[TMP81]], align 8 +// CHECK1-NEXT: [[TMP83:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP82]], i8* align 8 [[TMP83]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP79]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP84]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP86:%.*]] = load i8*, i8** [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store i8* [[TMP86]], i8** [[TMP85]], align 8 +// CHECK1-NEXT: [[TMP87:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, i32* [[TMP87]], align 4 +// CHECK1-NEXT: [[TMP89:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP88]], i8* [[TMP78]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP83:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP83]], 1 -// CHECK1-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP90:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP90]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP84:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, i32* [[TMP84]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP85]]) -// CHECK1-NEXT: [[TMP86:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP87:%.*]] = load i32, i32* [[TMP86]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP87]], i32 1) -// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP89:%.*]] = bitcast i32* [[ARGC1]] to i8* -// CHECK1-NEXT: store i8* [[TMP89]], i8** [[TMP88]], align 8 -// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP90]], align 8 -// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP13]] to i8* -// CHECK1-NEXT: store i8* [[TMP92]], i8** [[TMP91]], align 8 +// CHECK1-NEXT: [[TMP91:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP92:%.*]] = load i32, i32* [[TMP91]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP92]]) // CHECK1-NEXT: [[TMP93:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP94:%.*]] = load i32, i32* [[TMP93]], align 4 -// CHECK1-NEXT: [[TMP95:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP96:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB4]], i32 [[TMP94]], i32 2, i64 24, i8* [[TMP95]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP96]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP94]], i32 1) +// CHECK1-NEXT: [[TMP95:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP96:%.*]] = bitcast i32* [[ARGC]] to i8* +// CHECK1-NEXT: store i8* [[TMP96]], i8** [[TMP95]], align 8 +// CHECK1-NEXT: [[TMP97:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store i8* [[VLA]], i8** [[TMP97]], align 8 +// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP99:%.*]] = inttoptr i64 [[TMP20]] to i8* +// CHECK1-NEXT: store i8* [[TMP99]], i8** [[TMP98]], align 8 +// CHECK1-NEXT: [[TMP100:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP101:%.*]] = load i32, i32* [[TMP100]], align 4 +// CHECK1-NEXT: [[TMP102:%.*]] = bitcast [3 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP103:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB4]], i32 [[TMP101]], i32 2, i64 24, i8* [[TMP102]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP103]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP97:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP98:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP97]], [[TMP98]] -// CHECK1-NEXT: store i32 [[ADD15]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP99:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP99]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP104:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP105:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP104]], [[TMP105]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP106:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP106]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP100:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP100]] to i32 -// CHECK1-NEXT: [[TMP101:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP101]] to i32 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]] -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP107:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP107]] to i32 +// CHECK1-NEXT: [[TMP108:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP108]] to i32 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK1-NEXT: store i8 [[CONV18]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP99]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done22: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB4]], i32 [[TMP94]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP106]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done21: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB4]], i32 [[TMP101]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP102:%.*]] = load i32, i32* [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP103:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP102]] monotonic, align 4 -// CHECK1-NEXT: [[TMP104:%.*]] = getelementptr i8, i8* [[ARRAYIDX2]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq i8* [[ARRAYIDX2]], [[TMP104]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] -// CHECK1: omp.arraycpy.body24: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi i8* [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP105:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP105]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1 +// CHECK1-NEXT: [[TMP109:%.*]] = load i32, i32* [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP110:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP109]] monotonic, align 4 +// CHECK1-NEXT: [[TMP111:%.*]] = getelementptr i8, i8* [[ARRAYIDX1]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq i8* [[ARRAYIDX1]], [[TMP111]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] +// CHECK1: omp.arraycpy.body23: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi i8* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi i8* [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP112:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP112]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP106:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP111:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP106]], i8* [[_TMP28]], align 1 -// CHECK1-NEXT: [[TMP107:%.*]] = load i8, i8* [[_TMP28]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP107]] to i32 -// CHECK1-NEXT: [[TMP108:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP108]] to i32 -// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]] -// CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK1-NEXT: store i8 [[CONV32]], i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP109:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP110:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP106]], i8 [[TMP109]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP111]] = extractvalue { i8, i1 } [[TMP110]], 0 -// CHECK1-NEXT: [[TMP112:%.*]] = extractvalue { i8, i1 } [[TMP110]], 1 -// CHECK1-NEXT: br i1 [[TMP112]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP113:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP118:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP113]], i8* [[_TMP27]], align 1 +// CHECK1-NEXT: [[TMP114:%.*]] = load i8, i8* [[_TMP27]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP114]] to i32 +// CHECK1-NEXT: [[TMP115:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP115]] to i32 +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] +// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK1-NEXT: store i8 [[CONV31]], i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP116:%.*]] = load i8, i8* [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP117:%.*]] = cmpxchg i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP113]], i8 [[TMP116]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP118]] = extractvalue { i8, i1 } [[TMP117]], 0 +// CHECK1-NEXT: [[TMP119:%.*]] = extractvalue { i8, i1 } [[TMP117]], 1 +// CHECK1-NEXT: br i1 [[TMP119]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP104]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]] -// CHECK1: omp.arraycpy.done36: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP111]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] +// CHECK1: omp.arraycpy.done35: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP113:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP113]]) +// CHECK1-NEXT: [[TMP120:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP120]]) // CHECK1-NEXT: ret void // // @@ -799,7 +819,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[TMP4_I:%.*]] = alloca i8*, align 8 @@ -813,7 +833,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -826,29 +846,29 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load i8*, i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 // CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load i8**, i8*** [[TMP30]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i8*, i8** [[TMP31]], i64 9 // CHECK1-NEXT: [[TMP32:%.*]] = load i8*, i8** [[ARRAYIDX2_I]], align 8 @@ -859,10 +879,10 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 // CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint i8* [[TMP43]] to i64 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp @@ -347,83 +347,97 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -433,64 +447,68 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -498,83 +516,97 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -584,64 +616,68 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -649,83 +685,97 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -735,85 +785,89 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -821,83 +875,97 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -907,55 +975,59 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -970,83 +1042,97 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1056,55 +1142,59 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1261,81 +1351,95 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1345,61 +1449,65 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1407,81 +1515,95 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1491,61 +1613,65 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1553,81 +1679,95 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1637,80 +1777,84 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP15]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP21]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1718,81 +1862,95 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1802,52 +1960,56 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP18]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -1862,81 +2024,95 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1946,52 +2122,56 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP18]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -2148,83 +2328,97 @@ // CHECK5-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2234,64 +2428,68 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK5-NEXT: ret void // // @@ -2299,83 +2497,97 @@ // CHECK5-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2385,64 +2597,68 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK5-NEXT: ret void // // @@ -2450,83 +2666,97 @@ // CHECK5-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2536,85 +2766,89 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK5-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK5-NEXT: ret void // // @@ -2622,83 +2856,97 @@ // CHECK5-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2708,55 +2956,59 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -2771,83 +3023,97 @@ // CHECK5-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2857,55 +3123,59 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3062,81 +3332,95 @@ // CHECK7-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3146,61 +3430,65 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK7-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK7-NEXT: ret void // // @@ -3208,81 +3496,95 @@ // CHECK7-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3292,61 +3594,65 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK7-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK7-NEXT: ret void // // @@ -3354,81 +3660,95 @@ // CHECK7-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3438,80 +3758,84 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP15]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP21]] // CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK7-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK7-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK7-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK7-NEXT: ret void // // @@ -3519,81 +3843,95 @@ // CHECK7-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3603,52 +3941,56 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK7-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP18]] // CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK7: omp.inner.for.end: @@ -3663,81 +4005,95 @@ // CHECK7-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3747,52 +4103,56 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK7-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP18]] // CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK7: omp.inner.for.end: @@ -4182,24 +4542,29 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4209,90 +4574,103 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP24]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP25]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4305,84 +4683,88 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK13-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void @@ -4394,24 +4776,29 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4421,90 +4808,103 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP24]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP25]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4517,84 +4917,88 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK13-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void @@ -4608,7 +5012,7 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -4619,23 +5023,26 @@ // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i32* [[CONV1]], i32** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4645,123 +5052,135 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]) +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK13-NEXT: store i64 [[TMP24]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK13-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP28]], align 8 +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP2]], i32** [[TMP29]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP33]], i32* [[TMP32]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] -// CHECK13-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] -// CHECK13: cond.true12: -// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: br label [[COND_END14:%.*]] -// CHECK13: cond.false13: -// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: br label [[COND_END14]] -// CHECK13: cond.end14: -// CHECK13-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE12]] ], [ [[TMP32]], [[COND_FALSE13]] ] -// CHECK13-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] +// CHECK13-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK13: cond.true11: +// CHECK13-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: br label [[COND_END13:%.*]] +// CHECK13: cond.false12: +// CHECK13-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: br label [[COND_END13]] +// CHECK13: cond.end13: +// CHECK13-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] +// CHECK13-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP44]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK13-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4771,89 +5190,94 @@ // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK13-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void @@ -4865,24 +5289,29 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4892,90 +5321,103 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP24]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP25]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4988,73 +5430,77 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK13-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -5075,7 +5521,7 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -5086,23 +5532,26 @@ // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i32* [[CONV1]], i32** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5112,98 +5561,110 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]]) +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK13-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8 +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[TMP29]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i32* [[TMP6]], i32** [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// CHECK13-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5213,80 +5674,85 @@ // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK13-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 1073741859, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -5449,83 +5915,97 @@ // CHECK13-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5535,63 +6015,67 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK13-NEXT: ret void // // @@ -5599,83 +6083,97 @@ // CHECK13-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5685,63 +6183,67 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK13-NEXT: ret void // // @@ -5751,101 +6253,112 @@ // CHECK13-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.16*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK13-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5855,87 +6368,92 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK13-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK13-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]]) // CHECK13-NEXT: ret void // // @@ -5943,83 +6461,97 @@ // CHECK13-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.20*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.20* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.20*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.20* [[__CONTEXT]], %struct.anon.20** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.20*, %struct.anon.20** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], %struct.anon.20* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.21*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.21* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.21*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6029,54 +6561,58 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.21* [[__CONTEXT]], %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], %struct.anon.21* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK13-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK13-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -6093,101 +6629,112 @@ // CHECK13-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.22*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.22* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.22*, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK13-NEXT: store %struct.anon.22* [[__CONTEXT]], %struct.anon.22** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.22*, %struct.anon.22** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], %struct.anon.22* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.24*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.24* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.24*, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6197,58 +6744,63 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK13-NEXT: store %struct.anon.24* [[__CONTEXT]], %struct.anon.24** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.24*, %struct.anon.24** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], %struct.anon.24* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK13-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK13-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -6634,23 +7186,28 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6660,88 +7217,101 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP23]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store i32* [[TMP2]], i32** [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store i32* [[TMP6]], i32** [[TMP26]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6754,81 +7324,85 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK15-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void @@ -6840,23 +7414,28 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6866,88 +7445,101 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP23]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store i32* [[TMP2]], i32** [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store i32* [[TMP6]], i32** [[TMP26]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6960,81 +7552,85 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK15-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void @@ -7048,7 +7644,7 @@ // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK15-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -7057,22 +7653,26 @@ // CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[M_ADDR]], align 4 // CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]]) +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store i32* [[N_ADDR]], i32** [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -7082,119 +7682,133 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]) +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP24]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store i32* [[TMP2]], i32** [[TMP27]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP31]], i32* [[TMP30]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK15-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK15-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK15: cond.true11: -// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END13:%.*]] // CHECK15: cond.false12: -// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END13]] // CHECK15: cond.end13: -// CHECK15-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] +// CHECK15-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE11]] ], [ [[TMP41]], [[COND_FALSE12]] ] // CHECK15-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP42]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK15-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -7207,82 +7821,88 @@ // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP29]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK15-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void @@ -7294,23 +7914,28 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7320,88 +7945,101 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP23]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store i32* [[TMP2]], i32** [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store i32* [[TMP6]], i32** [[TMP26]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7414,70 +8052,74 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK15-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP20]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP28]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK15-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -7498,7 +8140,7 @@ // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK15-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -7507,22 +8149,26 @@ // CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[M_ADDR]], align 4 // CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]]) +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store i32* [[N_ADDR]], i32** [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -7532,94 +8178,108 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]]) +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[TMP27]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK15-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -7632,72 +8292,78 @@ // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 1073741859, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP21]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP31]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -7859,81 +8525,95 @@ // CHECK15-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP14]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7943,60 +8623,64 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK15-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK15-NEXT: ret void // // @@ -8004,81 +8688,95 @@ // CHECK15-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP14]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8088,60 +8786,64 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK15-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK15-NEXT: ret void // // @@ -8151,95 +8853,109 @@ // CHECK15-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK15-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[M_ADDR]], align 4 // CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP14]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP15]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], i32* [[TMP17]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8249,81 +8965,87 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP24]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK15-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK15-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK15-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]]) // CHECK15-NEXT: ret void // // @@ -8331,81 +9053,95 @@ // CHECK15-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP14]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.16*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8415,51 +9151,55 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK15-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP18]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -8476,95 +9216,109 @@ // CHECK15-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK15-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[M_ADDR]], align 4 // CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.17*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.17* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.17*, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.17* [[__CONTEXT]], %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], %struct.anon.17* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP14]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP15]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], i32* [[TMP17]], align 4 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8574,53 +9328,59 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK15-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP21]] // CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -9010,24 +9770,29 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9037,90 +9802,103 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP24]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP25]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[TMP27]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9133,84 +9911,88 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK17-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void @@ -9222,24 +10004,29 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9249,90 +10036,103 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP24]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP25]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[TMP27]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9345,84 +10145,88 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK17-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void @@ -9436,7 +10240,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK17-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -9447,23 +10251,26 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]]) +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[CONV1]], i32** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -9473,123 +10280,135 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]) +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK17-NEXT: store i64 [[TMP24]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK17-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP27]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP28]], align 8 +// CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP2]], i32** [[TMP29]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP33]], i32* [[TMP32]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] -// CHECK17-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] -// CHECK17: cond.true12: -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: br label [[COND_END14:%.*]] -// CHECK17: cond.false13: -// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: br label [[COND_END14]] -// CHECK17: cond.end14: -// CHECK17-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE12]] ], [ [[TMP32]], [[COND_FALSE13]] ] -// CHECK17-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] +// CHECK17-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK17: cond.true11: +// CHECK17-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: br label [[COND_END13:%.*]] +// CHECK17: cond.false12: +// CHECK17-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: br label [[COND_END13]] +// CHECK17: cond.end13: +// CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] +// CHECK17-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP44]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK17-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -9599,89 +10418,94 @@ // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 8 +// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK17-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] -// CHECK17-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK17-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void @@ -9693,24 +10517,29 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9720,90 +10549,103 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP24]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP25]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[TMP27]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9816,73 +10658,77 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !15 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !15 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !15 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -9903,7 +10749,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK17-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK17-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -9914,23 +10760,26 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]]) +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[CONV1]], i32** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -9940,98 +10789,110 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]]) +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK17-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8 +// CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], i64* [[TMP29]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i32* [[TMP6]], i32** [[TMP30]], align 8 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// CHECK17-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10041,80 +10902,85 @@ // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 8 +// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK17-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -10277,83 +11143,97 @@ // CHECK17-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10363,63 +11243,67 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -10427,83 +11311,97 @@ // CHECK17-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10513,63 +11411,67 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -10579,101 +11481,112 @@ // CHECK17-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK17-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.16*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK17-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10683,87 +11596,92 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK17-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]]) // CHECK17-NEXT: ret void // // @@ -10771,83 +11689,97 @@ // CHECK17-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.20*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.20* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.20*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.20* [[__CONTEXT]], %struct.anon.20** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.20*, %struct.anon.20** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], %struct.anon.20* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.21*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.21* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.21*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10857,54 +11789,58 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon.21* [[__CONTEXT]], %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], %struct.anon.21* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK17-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -10921,101 +11857,112 @@ // CHECK17-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK17-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.22*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.22* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.22*, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK17-NEXT: store %struct.anon.22* [[__CONTEXT]], %struct.anon.22** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.22*, %struct.anon.22** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], %struct.anon.22* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.24*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.24* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.24*, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11025,58 +11972,63 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK17-NEXT: store %struct.anon.24* [[__CONTEXT]], %struct.anon.24** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.24*, %struct.anon.24** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], %struct.anon.24* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK17-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -11462,23 +12414,28 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11488,88 +12445,101 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP22]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP23]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP2]], i32** [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP25]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32* [[TMP6]], i32** [[TMP26]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11582,81 +12552,85 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void @@ -11668,23 +12642,28 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11694,88 +12673,101 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP22]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP23]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP2]], i32** [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP25]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32* [[TMP6]], i32** [[TMP26]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11788,81 +12780,85 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void @@ -11876,7 +12872,7 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -11885,22 +12881,26 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[N_ADDR]], i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11910,119 +12910,133 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]) +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP24]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP25]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP26]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP2]], i32** [[TMP27]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP31]], i32* [[TMP30]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK19-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK19-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK19-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK19-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK19: cond.true11: -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END13:%.*]] // CHECK19: cond.false12: -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END13]] // CHECK19: cond.end13: -// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] +// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE11]] ], [ [[TMP41]], [[COND_FALSE12]] ] // CHECK19-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP42]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK19-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -12035,82 +13049,88 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP29]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void @@ -12122,23 +13142,28 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12148,88 +13173,101 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP22]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP23]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP2]], i32** [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP25]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32* [[TMP6]], i32** [[TMP26]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12242,70 +13280,74 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP20]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP28]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -12326,7 +13368,7 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -12335,22 +13377,26 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[N_ADDR]], i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -12360,94 +13406,108 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]]) +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP27]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK19-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -12460,72 +13520,78 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP21]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP31]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -12687,81 +13753,95 @@ // CHECK19-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP14]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12771,60 +13851,64 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -12832,81 +13916,95 @@ // CHECK19-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP14]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12916,60 +14014,64 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -12979,95 +14081,109 @@ // CHECK19-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], i32* [[TMP17]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13077,81 +14193,87 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP24]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK19-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]]) // CHECK19-NEXT: ret void // // @@ -13159,81 +14281,95 @@ // CHECK19-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP14]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.16*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13243,51 +14379,55 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK19-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP18]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -13304,95 +14444,109 @@ // CHECK19-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.17*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.17* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.17*, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.17* [[__CONTEXT]], %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], %struct.anon.17* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], i32* [[TMP17]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13402,53 +14556,59 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK19-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP21]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK19: omp.inner.for.end: diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp @@ -338,6 +338,7 @@ // CHECK1-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK1-NEXT: store i64 [[TE]], i64* [[TE_ADDR]], align 8 // CHECK1-NEXT: store i64 [[TH]], i64* [[TH_ADDR]], align 8 @@ -350,17 +351,20 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]], [100 x i32]* [[TMP1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [100 x i32]* [[TMP1]], [100 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -370,79 +374,94 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i32* [[TMP0]], [100 x i32]* [[TMP1]]), !llvm.access.group !5 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP22]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP23]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP24]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [100 x i32]* [[TMP4]], [100 x i32]** [[TMP25]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -455,14 +474,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -475,88 +491,92 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP31]], 0 // CHECK1-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -574,114 +594,136 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 8 // CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[I]], i64* [[I_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV1]], [100 x i32]* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[CONV1]], i32** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [100 x i32]* [[TMP0]], [100 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[I3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP6]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i64 16) ] // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32*, [100 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[I4]], i32* [[TMP1]], [100 x i32]* [[TMP2]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[I3]], i32** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i32* [[TMP4]], i32** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store [100 x i32]* [[TMP6]], [100 x i32]** [[TMP28]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK1-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK1-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[TMP2]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -690,129 +732,129 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[I3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP10]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i64 16) ] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTLINEAR_START]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP17]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP11]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP21]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP24]], [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP10]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK1-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK1-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK1-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK1-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[TMP6]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK1-NEXT: br i1 [[TMP38]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: @@ -980,6 +1022,7 @@ // CHECK3-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK3-NEXT: store i32 [[TE]], i32* [[TE_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TH]], i32* [[TH_ADDR]], align 4 @@ -989,17 +1032,20 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TE_ADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TH_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], [100 x i32]* [[TMP1]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store [100 x i32]* [[TMP1]], [100 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1009,77 +1055,92 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32* [[TMP0]], [100 x i32]* [[TMP1]]), !llvm.access.group !6 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP20]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP21]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[TMP2]], i32** [[TMP22]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store [100 x i32]* [[TMP4]], [100 x i32]** [[TMP23]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP30]], 0 // CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -1092,14 +1153,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1112,85 +1170,89 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP1]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP8]], i32 0, i32 [[TMP25]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK3-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP31]], 0 // CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK3-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1208,110 +1270,132 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 4 // CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[I]], i32* [[I_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[I_ADDR]], i32* [[N_ADDR]], [100 x i32]* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[I_ADDR]], i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [100 x i32]* [[TMP0]], [100 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP6]], i32 0, i32 0 // CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i32 16) ] // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [100 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[I4]], i32* [[TMP1]], [100 x i32]* [[TMP2]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i32* [[TMP4]], i32** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store [100 x i32]* [[TMP6]], [100 x i32]** [[TMP26]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[TMP2]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] @@ -1320,126 +1404,126 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP10]], i32 0, i32 0 // CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i32 16) ] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTLINEAR_START]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP11]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP19]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP21]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP24]], [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP2]], i32 0, i32 [[TMP22]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP10]], i32 0, i32 [[TMP30]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK3-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK3-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 -// CHECK3-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 -// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK3-NEXT: store i32 [[ADD12]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK3-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK3-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK3-NEXT: store i32 [[ADD11]], i32* [[TMP6]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK3-NEXT: br i1 [[TMP30]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK3-NEXT: br i1 [[TMP38]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: @@ -1826,6 +1910,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[I]], i64* [[I_ADDR]], align 8 @@ -1834,110 +1919,134 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 0, i32* [[I3]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: -// CHECK9-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[TMP3]], i64 16) ] +// CHECK9-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[TMP8]], i64 16) ] // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[I4]], i32* [[TMP1]], i64 [[TMP2]], i32* [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[I3]], i32** [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32* [[TMP4]], i32** [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP6]], i64* [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i32* [[TMP8]], i32** [[TMP31]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[TMP2]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -1946,131 +2055,130 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I6:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 0, i32* [[I3]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: -// CHECK9-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[TMP3]], i64 16) ] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 +// CHECK9-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[TMP12]], i64 16) ] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTLINEAR_START]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK9-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP19]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP12]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK9-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP21]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP32]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK9-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK9-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK9-NEXT: store i32 [[ADD13]], i32* [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK9-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[TMP6]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK9-NEXT: br i1 [[TMP40]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK9: .omp.linear.pu: // CHECK9-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK9: .omp.linear.pu.done: @@ -2187,114 +2295,139 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[I]], i32* [[I_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[I_ADDR]], i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[I_ADDR]], i32** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: -// CHECK11-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[TMP3]], i32 16) ] +// CHECK11-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[TMP8]], i32 16) ] // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[I4]], i32* [[TMP1]], i32 [[TMP2]], i32* [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[I3]], i32** [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32* [[TMP4]], i32** [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32* [[TMP8]], i32** [[TMP29]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[TMP2]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -2303,128 +2436,127 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: -// CHECK11-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[TMP3]], i32 16) ] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 +// CHECK11-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[TMP12]], i32 16) ] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTLINEAR_START]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP12]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -// CHECK11-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i32 [[TMP23]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i32 [[TMP32]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK11-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK11-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 -// CHECK11-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 -// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK11-NEXT: store i32 [[ADD12]], i32* [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK11-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK11-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK11-NEXT: store i32 [[ADD11]], i32* [[TMP6]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK11: .omp.linear.pu: // CHECK11-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK11: .omp.linear.pu.done: @@ -2666,99 +2798,115 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 // CHECK17-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[I]], i64* [[I_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], %struct.SS* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[CONV]], i32** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP2]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I1:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK17-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP1]], i32 0, i32 0 +// CHECK17-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load %struct.SS*, %struct.SS** [[TMP3]], align 8 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP4]], i32 0, i32 0 // CHECK17-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 0 // CHECK17-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i64 16) ] // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 122 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 122 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]], i32* [[I1]], %struct.SS* [[TMP1]]) +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17: omp.inner.for.body: +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[I]], i32** [[TMP18]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store %struct.SS* [[TMP4]], %struct.SS** [[TMP19]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK17-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: store i32 123, i32* [[TMP0]], align 4 +// CHECK17-NEXT: store i32 123, i32* [[TMP2]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -2766,86 +2914,90 @@ // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I2:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK17-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP1]], i32 0, i32 0 +// CHECK17-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load %struct.SS*, %struct.SS** [[TMP7]], align 8 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP8]], i32 0, i32 0 // CHECK17-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 0 // CHECK17-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i64 16) ] -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTLINEAR_START]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTLINEAR_START]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP11]] to i32 // CHECK17-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK17-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]]) -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 122 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK17-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP13]]) +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 122 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I2]], align 4 -// CHECK17-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP1]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[I2]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A5]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK17-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP8]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A4]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK17-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK17-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: store i32 123, i32* [[TMP0]], align 4 +// CHECK17-NEXT: store i32 123, i32* [[TMP6]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK17-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK17-NEXT: br i1 [[TMP25]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK17: .omp.linear.pu: // CHECK17-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK17: .omp.linear.pu.done: @@ -2920,96 +3072,112 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 // CHECK19-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[I]], i32* [[I_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[I_ADDR]], %struct.SS* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[I_ADDR]], i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP2]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[I1:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 -// CHECK19-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP1]], i32 0, i32 0 +// CHECK19-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load %struct.SS*, %struct.SS** [[TMP3]], align 4 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP4]], i32 0, i32 0 // CHECK19-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 0 // CHECK19-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i32 16) ] // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 122 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 122 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP9]], i32 [[TMP10]], i32* [[I1]], %struct.SS* [[TMP1]]) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[I]], i32** [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store %struct.SS* [[TMP4]], %struct.SS** [[TMP17]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK19-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: store i32 123, i32* [[TMP0]], align 4 +// CHECK19-NEXT: store i32 123, i32* [[TMP2]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK19: .omp.final.done: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -3017,83 +3185,87 @@ // CHECK19-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I1:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[I2:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 -// CHECK19-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP1]], i32 0, i32 0 +// CHECK19-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load %struct.SS*, %struct.SS** [[TMP7]], align 4 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP8]], i32 0, i32 0 // CHECK19-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 0 // CHECK19-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i32 16) ] -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTLINEAR_START]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTLINEAR_START]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK19-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]]) -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP6]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 122 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK19-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP13]]) +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 122 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK19-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I1]], align 4 -// CHECK19-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP1]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[I1]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A4]], i32 0, i32 [[TMP13]] +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK19-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP8]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A3]], i32 0, i32 [[TMP20]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK19-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK19-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: store i32 123, i32* [[TMP0]], align 4 +// CHECK19-NEXT: store i32 123, i32* [[TMP6]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK19: .omp.final.done: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK19-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK19-NEXT: br i1 [[TMP25]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK19: .omp.linear.pu: // CHECK19-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK19: .omp.linear.pu.done: @@ -3343,6 +3515,7 @@ // CHECK25-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK25-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK25-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK25-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK25-NEXT: store i64 [[I]], i64* [[I_ADDR]], align 8 @@ -3351,110 +3524,134 @@ // CHECK25-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK25-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK25-NEXT: store i32* [[CONV1]], i32** [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK25-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK25-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK25-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK25-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK25-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK25-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK25-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK25-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK25-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK25-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK25-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK25-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK25-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK25-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK25-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK25-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 0, i32* [[I3]], align 4 -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK25-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK25-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK25-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK25: omp.precond.then: -// CHECK25-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[TMP3]], i64 16) ] +// CHECK25-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[TMP8]], i64 16) ] // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK25-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK25-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +// CHECK25-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK25-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK25-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK25-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK25-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[I4]], i32* [[TMP1]], i64 [[TMP2]], i32* [[TMP3]]) +// CHECK25-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK25-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK25-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK25-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK25-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8 +// CHECK25-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK25-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8 +// CHECK25-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK25-NEXT: store i32* [[I3]], i32** [[TMP28]], align 8 +// CHECK25-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK25-NEXT: store i32* [[TMP4]], i32** [[TMP29]], align 8 +// CHECK25-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK25-NEXT: store i64 [[TMP6]], i64* [[TMP30]], align 8 +// CHECK25-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK25-NEXT: store i32* [[TMP8]], i32** [[TMP31]], align 8 +// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK25-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK25-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK25-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK25-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK25-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK25-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK25-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK25-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK25: .omp.final.then: -// CHECK25-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK25-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK25-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK25-NEXT: store i32 [[ADD9]], i32* [[TMP0]], align 4 +// CHECK25-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK25-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK25-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK25-NEXT: store i32 [[ADD8]], i32* [[TMP2]], align 4 // CHECK25-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK25: .omp.final.done: // CHECK25-NEXT: br label [[OMP_PRECOND_END]] @@ -3463,131 +3660,130 @@ // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK25-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[I6:%.*]] = alloca i32, align 4 // CHECK25-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK25-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK25-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK25-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK25-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK25-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK25-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK25-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK25-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP9]], align 8 +// CHECK25-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK25-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK25-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK25-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK25-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 0, i32* [[I3]], align 4 -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK25-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK25-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK25-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK25: omp.precond.then: -// CHECK25-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[TMP3]], i64 16) ] -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK25-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 +// CHECK25-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[TMP12]], i64 16) ] +// CHECK25-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK25-NEXT: store i32 [[TMP16]], i32* [[DOTLINEAR_START]], align 4 // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK25-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK25-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK25-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP19]] to i32 // CHECK25-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK25-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP12]]) -// CHECK25-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -// CHECK25-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK25-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK25-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP21]]) +// CHECK25-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK25-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: -// CHECK25-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK25-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK25-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK25-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK25-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK25-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK25-NEXT: [[TMP23:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 [[IDXPROM]] +// CHECK25-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 +// CHECK25-NEXT: [[TMP32:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP32]] to i64 +// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK25-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK25-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK25-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK25-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK25-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK25-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK25-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK25-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK25: .omp.final.then: -// CHECK25-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK25-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK25-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK25-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK25-NEXT: store i32 [[ADD13]], i32* [[TMP0]], align 4 +// CHECK25-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK25-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK25-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK25-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK25-NEXT: store i32 [[ADD12]], i32* [[TMP6]], align 4 // CHECK25-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK25: .omp.final.done: -// CHECK25-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK25-NEXT: br i1 [[TMP31]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK25-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK25-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK25-NEXT: br i1 [[TMP40]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK25: .omp.linear.pu: // CHECK25-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK25: .omp.linear.pu.done: @@ -3664,6 +3860,7 @@ // CHECK25-NEXT: [[TE_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) // CHECK25-NEXT: store i64 [[TE]], i64* [[TE_ADDR]], align 8 // CHECK25-NEXT: store i64 [[TH]], i64* [[TH_ADDR]], align 8 @@ -3674,73 +3871,88 @@ // CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) +// CHECK25-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store [10 x i32]* [[TMP1]], [10 x i32]** [[TMP4]], align 8 +// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK25-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK25-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK25-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK25-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 -// CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 +// CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK25-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 -// CHECK25-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK25-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !11 +// CHECK25-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !11 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 +// CHECK25-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK25-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !11 +// CHECK25-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !11 +// CHECK25-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK25-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !11 +// CHECK25-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK25-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 8, !llvm.access.group !11 +// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !11 // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 -// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 +// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK25-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK25-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK25-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK25-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK25-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK25-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK25: .omp.final.then: // CHECK25-NEXT: store i32 10, i32* [[I]], align 4 // CHECK25-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3749,13 +3961,11 @@ // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3765,66 +3975,70 @@ // CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK25-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK25-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK25-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK25-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK25-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK25-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK25-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK25-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK25-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK25-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK25-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK25-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK25-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK25-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK25-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK25-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK25-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK25-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK25-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK25-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK25-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK25: .omp.final.then: // CHECK25-NEXT: store i32 10, i32* [[I]], align 4 // CHECK25-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3948,114 +4162,139 @@ // CHECK27-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK27-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK27-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK27-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[I]], i32* [[I_ADDR]], align 4 // CHECK27-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[I_ADDR]], i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK27-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store i32* [[I_ADDR]], i32** [[TMP2]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK27-NEXT: store i32* [[N_ADDR]], i32** [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK27-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK27-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK27-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 -// CHECK27-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK27-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 -// CHECK27-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK27-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK27-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK27-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK27-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK27-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK27-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK27-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK27-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK27-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 0, i32* [[I3]], align 4 -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK27-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK27-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK27: omp.precond.then: -// CHECK27-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[TMP3]], i32 16) ] +// CHECK27-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[TMP8]], i32 16) ] // CHECK27-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK27-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK27-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +// CHECK27-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK27-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK27-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK27-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[I4]], i32* [[TMP1]], i32 [[TMP2]], i32* [[TMP3]]) +// CHECK27-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK27-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK27-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4 +// CHECK27-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK27-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4 +// CHECK27-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK27-NEXT: store i32* [[I3]], i32** [[TMP26]], align 4 +// CHECK27-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK27-NEXT: store i32* [[TMP4]], i32** [[TMP27]], align 4 +// CHECK27-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK27-NEXT: store i32 [[TMP6]], i32* [[TMP28]], align 4 +// CHECK27-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK27-NEXT: store i32* [[TMP8]], i32** [[TMP29]], align 4 +// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK27-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK27-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK27-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK27-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK27-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK27-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK27-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK27-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK27: .omp.final.then: -// CHECK27-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK27-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK27-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK27-NEXT: store i32 [[ADD9]], i32* [[TMP0]], align 4 +// CHECK27-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK27-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK27-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK27-NEXT: store i32 [[ADD8]], i32* [[TMP2]], align 4 // CHECK27-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK27: .omp.final.done: // CHECK27-NEXT: br label [[OMP_PRECOND_END]] @@ -4064,128 +4303,127 @@ // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 -// CHECK27-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 -// CHECK27-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK27-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK27-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK27-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK27-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK27-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK27-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK27-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 4 +// CHECK27-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK27-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK27-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK27-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK27-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 0, i32* [[I3]], align 4 -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK27-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK27-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK27: omp.precond.then: -// CHECK27-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[TMP3]], i32 16) ] -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK27-NEXT: store i32 [[TMP7]], i32* [[DOTLINEAR_START]], align 4 +// CHECK27-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[TMP12]], i32 16) ] +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK27-NEXT: store i32 [[TMP16]], i32* [[DOTLINEAR_START]], align 4 // CHECK27-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK27-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_UB]], align 4 // CHECK27-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK27-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP12]]) -// CHECK27-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -// CHECK27-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK27-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK27-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP21]]) +// CHECK27-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK27-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: -// CHECK27-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK27-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK27-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK27-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK27-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK27-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK27-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i32 [[TMP23]] +// CHECK27-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK27-NEXT: [[TMP32:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i32 [[TMP32]] // CHECK27-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK27-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK27-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK27-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK27-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK27-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) +// CHECK27-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK27-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK27-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK27: .omp.final.then: -// CHECK27-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK27-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 -// CHECK27-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 -// CHECK27-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK27-NEXT: store i32 [[ADD12]], i32* [[TMP0]], align 4 +// CHECK27-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK27-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK27-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK27-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK27-NEXT: store i32 [[ADD11]], i32* [[TMP6]], align 4 // CHECK27-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK27: .omp.final.done: -// CHECK27-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK27-NEXT: br i1 [[TMP31]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK27-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK27-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK27-NEXT: br i1 [[TMP40]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK27: .omp.linear.pu: // CHECK27-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK27: .omp.linear.pu.done: @@ -4260,6 +4498,7 @@ // CHECK27-NEXT: [[TE_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) // CHECK27-NEXT: store i32 [[TE]], i32* [[TE_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TH]], i32* [[TH_ADDR]], align 4 @@ -4268,71 +4507,86 @@ // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[TE_ADDR]], align 4 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[TH_ADDR]], align 4 // CHECK27-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) +// CHECK27-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store [10 x i32]* [[TMP1]], [10 x i32]** [[TMP4]], align 4 +// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK27-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK27-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK27-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK27-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK27-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK27-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !12 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK27-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK27-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP14]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !12 // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 -// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK27-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK27-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK27-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK27-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK27-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK27: .omp.final.then: // CHECK27-NEXT: store i32 10, i32* [[I]], align 4 // CHECK27-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4341,13 +4595,11 @@ // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4357,63 +4609,67 @@ // CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK27-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK27-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK27-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK27-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK27-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK27-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK27-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 -// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK27-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !16 -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK27-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK27-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK27-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK27-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK27-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK27-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK27-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK27: .omp.final.then: // CHECK27-NEXT: store i32 10, i32* [[I]], align 4 // CHECK27-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp @@ -156,18 +156,21 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -175,58 +178,71 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 56087, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !4 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: store i32 456, i32* [[J]], align 4 @@ -236,13 +252,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -254,79 +268,83 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK1-NEXT: store i32 [[ADD7]], i32* [[J]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: store i32 456, i32* [[J]], align 4 @@ -391,18 +409,21 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -410,56 +431,69 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 56087, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !5 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: store i32 456, i32* [[J]], align 4 @@ -469,13 +503,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -487,75 +519,79 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], i32* [[J]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i32 0, i32 [[TMP19]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP20]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: store i32 456, i32* [[J]], align 4 @@ -847,6 +883,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -857,133 +894,160 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i64, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV3]], i64 [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV3]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], i64* [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP7]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[M]], i32** [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[M_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 // CHECK9-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], i64* [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32*, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP21]], i64 [[TMP22]], i32* [[TMP0]], i32* [[TMP1]], i64 [[TMP2]], i64 [[TMP3]], i32* [[TMP4]]), !llvm.access.group !5 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: store i64 [[TMP27]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP31]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32* [[TMP4]], i32** [[TMP32]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP6]], i64* [[TMP33]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store i32* [[TMP10]], i32** [[TMP35]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] // CHECK9-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 0, [[MUL17]] -// CHECK9-NEXT: store i32 [[ADD18]], i32* [[I11]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP30]], 0 -// CHECK9-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 -// CHECK9-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 -// CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] -// CHECK9-NEXT: store i32 [[ADD22]], i32* [[J12]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] +// CHECK9-NEXT: store i32 [[ADD16]], i32* [[I9]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP43]], 0 +// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 +// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 +// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL19]] +// CHECK9-NEXT: store i32 [[ADD20]], i32* [[J10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -992,172 +1056,170 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[M]], i32** [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[M_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 // CHECK9-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP21]], i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: store i64 [[TMP22]], i64* [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP20]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP30]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP23]], [[CONV18]] -// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV21]], i32* [[I11]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP26]], [[CONV25]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP25]], [[MUL31]] -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK9-NEXT: store i32 [[CONV35]], i32* [[J12]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I11]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[TMP30:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 [[TMP30]] -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[J12]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64 -// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM36]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX37]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP33]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], i32* [[I9]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP36]], [[CONV23]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP35]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], i32* [[J10]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[I9]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP39]] to i64 +// CHECK9-NEXT: [[TMP40:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP12]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i64 [[TMP40]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[J10]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX35]], align 4, !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP32]], 1 -// CHECK9-NEXT: store i64 [[ADD38]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK9-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB39:%.*]] = sub nsw i32 [[TMP37]], 0 -// CHECK9-NEXT: [[DIV40:%.*]] = sdiv i32 [[SUB39]], 1 -// CHECK9-NEXT: [[MUL41:%.*]] = mul nsw i32 [[DIV40]], 1 -// CHECK9-NEXT: [[ADD42:%.*]] = add nsw i32 0, [[MUL41]] -// CHECK9-NEXT: store i32 [[ADD42]], i32* [[I11]], align 4 -// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB43:%.*]] = sub nsw i32 [[TMP38]], 0 -// CHECK9-NEXT: [[DIV44:%.*]] = sdiv i32 [[SUB43]], 1 -// CHECK9-NEXT: [[MUL45:%.*]] = mul nsw i32 [[DIV44]], 1 -// CHECK9-NEXT: [[ADD46:%.*]] = add nsw i32 0, [[MUL45]] -// CHECK9-NEXT: store i32 [[ADD46]], i32* [[J12]], align 4 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB37:%.*]] = sub nsw i32 [[TMP47]], 0 +// CHECK9-NEXT: [[DIV38:%.*]] = sdiv i32 [[SUB37]], 1 +// CHECK9-NEXT: [[MUL39:%.*]] = mul nsw i32 [[DIV38]], 1 +// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL39]] +// CHECK9-NEXT: store i32 [[ADD40]], i32* [[I9]], align 4 +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB41:%.*]] = sub nsw i32 [[TMP48]], 0 +// CHECK9-NEXT: [[DIV42:%.*]] = sdiv i32 [[SUB41]], 1 +// CHECK9-NEXT: [[MUL43:%.*]] = mul nsw i32 [[DIV42]], 1 +// CHECK9-NEXT: [[ADD44:%.*]] = add nsw i32 0, [[MUL43]] +// CHECK9-NEXT: store i32 [[ADD44]], i32* [[J10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -1201,18 +1263,21 @@ // CHECK9-SAME: ([10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x [2 x i32]]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [10 x [2 x i32]]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x [2 x i32]]* [[TMP0]], [10 x [2 x i32]]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1220,58 +1285,71 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 19, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x [2 x i32]]* [[TMP0]]), !llvm.access.group !14 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x [2 x i32]]* [[TMP2]], [10 x [2 x i32]]** [[TMP16]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !14 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: store i32 2, i32* [[J]], align 4 @@ -1281,13 +1359,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1299,78 +1375,82 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK9-NEXT: store i32 [[ADD7]], i32* [[J]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: store i32 2, i32* [[J]], align 4 @@ -1519,6 +1599,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -1527,135 +1608,162 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32* [[M_ADDR]], i32 [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[M_ADDR]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP7]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[M]], i32** [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[M_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 // CHECK11-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], i64* [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP22]], i32 [[TMP24]], i32* [[TMP0]], i32* [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], i32* [[TMP4]]), !llvm.access.group !6 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +// CHECK11-NEXT: store i32 [[TMP28]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP30:%.*]] = trunc i64 [[TMP29]] to i32 +// CHECK11-NEXT: store i32 [[TMP30]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP31]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP32]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP33]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32* [[TMP4]], i32** [[TMP34]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[TMP35]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[TMP36]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store i32* [[TMP10]], i32** [[TMP37]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP39:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP38]], [[TMP39]] // CHECK11-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK11-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]]) +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK11-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP31]], 0 -// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1 -// CHECK11-NEXT: [[ADD18:%.*]] = add nsw i32 0, [[MUL17]] -// CHECK11-NEXT: store i32 [[ADD18]], i32* [[I11]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP32]], 0 -// CHECK11-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 -// CHECK11-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 -// CHECK11-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] -// CHECK11-NEXT: store i32 [[ADD22]], i32* [[J12]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK11-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK11-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 +// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] +// CHECK11-NEXT: store i32 [[ADD16]], i32* [[I9]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP45]], 0 +// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 +// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 +// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL19]] +// CHECK11-NEXT: store i32 [[ADD20]], i32* [[J10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -1664,172 +1772,170 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[M]], i32** [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[M_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 // CHECK11-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV11:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV12:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK11-NEXT: store i64 [[CONV11]], i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[CONV12]], i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP21]], i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[CONV9:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[CONV10:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK11-NEXT: store i64 [[CONV9]], i64* [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[CONV10]], i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP20]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP30]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP21]], [[TMP22]] -// CHECK11-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK11-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP23]], [[CONV20]] -// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK11-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK11-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK11-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK11-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP26]], [[CONV27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK11-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK11-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK11-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP25]], [[MUL33]] -// CHECK11-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK11-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK11-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK11-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I13]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP30:%.*]] = mul nsw i32 [[TMP29]], [[TMP3]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 [[TMP30]] -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[J14]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i32 [[TMP31]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX38]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP33]], [[CONV18]] +// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV21]], i32* [[I11]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP36]], [[CONV25]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP35]], [[MUL31]] +// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK11-NEXT: store i32 [[CONV35]], i32* [[J12]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[I11]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP40:%.*]] = mul nsw i32 [[TMP39]], [[TMP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 [[TMP40]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[J12]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i32 [[TMP41]] +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX36]], align 4, !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP32]], 1 -// CHECK11-NEXT: store i64 [[ADD39]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK11-NEXT: store i64 [[ADD37]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK11-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB40:%.*]] = sub nsw i32 [[TMP37]], 0 -// CHECK11-NEXT: [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1 -// CHECK11-NEXT: [[MUL42:%.*]] = mul nsw i32 [[DIV41]], 1 -// CHECK11-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL42]] -// CHECK11-NEXT: store i32 [[ADD43]], i32* [[I13]], align 4 -// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB44:%.*]] = sub nsw i32 [[TMP38]], 0 -// CHECK11-NEXT: [[DIV45:%.*]] = sdiv i32 [[SUB44]], 1 -// CHECK11-NEXT: [[MUL46:%.*]] = mul nsw i32 [[DIV45]], 1 -// CHECK11-NEXT: [[ADD47:%.*]] = add nsw i32 0, [[MUL46]] -// CHECK11-NEXT: store i32 [[ADD47]], i32* [[J14]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB38:%.*]] = sub nsw i32 [[TMP47]], 0 +// CHECK11-NEXT: [[DIV39:%.*]] = sdiv i32 [[SUB38]], 1 +// CHECK11-NEXT: [[MUL40:%.*]] = mul nsw i32 [[DIV39]], 1 +// CHECK11-NEXT: [[ADD41:%.*]] = add nsw i32 0, [[MUL40]] +// CHECK11-NEXT: store i32 [[ADD41]], i32* [[I11]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB42:%.*]] = sub nsw i32 [[TMP48]], 0 +// CHECK11-NEXT: [[DIV43:%.*]] = sdiv i32 [[SUB42]], 1 +// CHECK11-NEXT: [[MUL44:%.*]] = mul nsw i32 [[DIV43]], 1 +// CHECK11-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] +// CHECK11-NEXT: store i32 [[ADD45]], i32* [[J12]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -1873,18 +1979,21 @@ // CHECK11-SAME: ([10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x [2 x i32]]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [10 x [2 x i32]]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x [2 x i32]]* [[TMP0]], [10 x [2 x i32]]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1892,56 +2001,69 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 19, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x [2 x i32]]* [[TMP0]]), !llvm.access.group !15 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [10 x [2 x i32]]* [[TMP2]], [10 x [2 x i32]]** [[TMP14]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: store i32 2, i32* [[J]], align 4 @@ -1951,13 +2073,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1969,74 +2089,78 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], i32* [[J]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP6]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP20]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: store i32 2, i32* [[J]], align 4 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp @@ -239,75 +239,91 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !6 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !6 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -316,13 +332,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -332,67 +346,71 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -404,75 +422,91 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !15 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -481,13 +515,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -497,67 +529,71 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -569,95 +605,111 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], %struct.SS* [[TMP0]]), !llvm.access.group !21 +// CHECK1-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP13]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP14]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP15]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], 122 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 122 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: -// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP18]], [[COND_FALSE6]] ] +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -666,13 +718,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -682,67 +732,71 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -850,73 +904,89 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !7 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !7 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -925,13 +995,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -941,64 +1009,68 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1010,73 +1082,89 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !16 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1085,13 +1173,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1101,64 +1187,68 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1170,93 +1260,109 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]], %struct.SS* [[TMP0]]), !llvm.access.group !22 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP11]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP12]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP13]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !22 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], 122 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 122 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP16]], [[COND_FALSE6]] ] +// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] // CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1265,13 +1371,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1281,64 +1385,68 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1835,24 +1943,29 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1862,81 +1975,98 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !9 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP24]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP25]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[TMP27]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -1949,15 +2079,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1970,90 +2096,94 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK9-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -2071,24 +2201,29 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2098,81 +2233,98 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !18 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP24]], align 8, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP25]], align 8, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 8, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[TMP27]], align 8, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 8, !llvm.access.group !18 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !18 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -2185,15 +2337,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2206,90 +2354,94 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK9-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -2309,7 +2461,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -2320,23 +2472,26 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2346,117 +2501,133 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !24 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !24 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK9-NEXT: store i64 [[TMP24]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP27]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP28]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP29]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: store i32 [[TMP33]], i32* [[TMP32]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] -// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] -// CHECK9: cond.true12: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: br label [[COND_END14:%.*]] -// CHECK9: cond.false13: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: br label [[COND_END14]] -// CHECK9: cond.end14: -// CHECK9-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE12]] ], [ [[TMP32]], [[COND_FALSE13]] ] -// CHECK9-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK9: cond.true11: +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: br label [[COND_END13:%.*]] +// CHECK9: cond.false12: +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: br label [[COND_END13]] +// CHECK9: cond.end13: +// CHECK9-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] +// CHECK9-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: store i32 [[TMP44]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 -// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0 +// CHECK9-NEXT: br i1 [[TMP48]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB16:%.*]] = sub nsw i32 [[TMP38]], 0 -// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i32 [[SUB16]], 1 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV17]], 1 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD18]], i32* [[I4]], align 4 +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP49]], 0 +// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] +// CHECK9-NEXT: store i32 [[ADD17]], i32* [[I4]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -2465,16 +2636,12 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2484,99 +2651,104 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 8 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK9-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK9-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK9-NEXT: store i32 [[ADD13]], i32* [[I6]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK9-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK9-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[I5]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -2678,75 +2850,91 @@ // CHECK9-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !30 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2755,13 +2943,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2771,66 +2957,70 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2842,75 +3032,91 @@ // CHECK9-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !36 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2919,13 +3125,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2935,66 +3139,70 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3008,113 +3216,126 @@ // CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK9-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !42 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !42 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP20]], 9 -// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] -// CHECK9: cond.true6: -// CHECK9-NEXT: br label [[COND_END8:%.*]] -// CHECK9: cond.false7: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: br label [[COND_END8]] -// CHECK9: cond.end8: -// CHECK9-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP21]], [[COND_FALSE7]] ] -// CHECK9-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP27]], 9 +// CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK9: cond.true5: +// CHECK9-NEXT: br label [[COND_END7:%.*]] +// CHECK9: cond.false6: +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: br label [[COND_END7]] +// CHECK9: cond.end7: +// CHECK9-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP28]], [[COND_FALSE6]] ] +// CHECK9-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: store i32 [[TMP29]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3123,14 +3344,12 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3140,68 +3359,73 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK9-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !45 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3449,23 +3673,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3475,79 +3704,96 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !10 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP22]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP23]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP24]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP25]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[TMP6]], i32** [[TMP26]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -3560,15 +3806,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3581,87 +3823,91 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -3679,23 +3925,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3705,79 +3956,96 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !19 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP22]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP23]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP24]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP25]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[TMP6]], i32** [[TMP26]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -3790,15 +4058,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3811,87 +4075,91 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -3911,7 +4179,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -3920,22 +4188,26 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3945,109 +4217,127 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !25 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[TMP24]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP25]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP26]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP27]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[TMP31]], i32* [[TMP30]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !25 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK11-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK11: cond.true11: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK11-NEXT: br label [[COND_END13:%.*]] // CHECK11: cond.false12: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK11-NEXT: br label [[COND_END13]] // CHECK11: cond.end13: -// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] +// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE11]] ], [ [[TMP41]], [[COND_FALSE12]] ] // CHECK11-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[TMP42]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK11-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP47]], 0 // CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -4060,16 +4350,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4082,88 +4368,94 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP29]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK11-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK11-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -4268,73 +4560,89 @@ // CHECK11-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !31 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP14]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !31 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4343,13 +4651,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4359,63 +4665,67 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4427,73 +4737,89 @@ // CHECK11-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !37 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !37 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP14]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !37 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !37 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4502,13 +4828,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4518,63 +4842,67 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !40 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !40 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !40 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4588,107 +4916,123 @@ // CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK11-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !43 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP14]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP15]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[TMP17]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !43 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP25]], 9 // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK11: cond.true5: // CHECK11-NEXT: br label [[COND_END7:%.*]] // CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK11-NEXT: br label [[COND_END7]] // CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ] +// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP26]], [[COND_FALSE6]] ] // CHECK11-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4697,14 +5041,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4714,64 +5056,70 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !46 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !46 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP19]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !46 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -373,8 +373,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -385,149 +384,166 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !5 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]), !llvm.access.group !5 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP28]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP31]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP32]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: store i32 [[TMP34]], i32* [[TMP33]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK1-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP28]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP41]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done11: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // @@ -565,151 +581,153 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP8:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast [2 x %struct.S]* [[TMP10]] to %struct.S* +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP20]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done6: -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR7]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP20]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP12]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP22]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP23]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !9 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP29]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false), !llvm.access.group !9 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP34]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[SIVAR]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK1-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN14]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP29]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP41]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done15: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -717,35 +735,35 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) -// CHECK1-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK1-NEXT: store %struct.S.2* [[TEST]], %struct.S.2** [[VAR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S.2*, %struct.S.2** [[VAR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP1]], %struct.S.2** [[TMP]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64* // CHECK1-NEXT: store i64 [[TMP3]], i64* [[TMP8]], align 8 @@ -763,19 +781,19 @@ // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK1-NEXT: store i8* null, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.2]** +// CHECK1-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP18]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.0]** -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.2]** +// CHECK1-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP20]], align 8 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 // CHECK1-NEXT: store i8* null, i8** [[TMP21]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK1-NEXT: store %struct.S.0* [[TMP5]], %struct.S.0** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.2** +// CHECK1-NEXT: store %struct.S.2* [[TMP5]], %struct.S.2** [[TMP23]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.0** -// CHECK1-NEXT: store %struct.S.0* [[TMP6]], %struct.S.0** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.2** +// CHECK1-NEXT: store %struct.S.2* [[TMP6]], %struct.S.2** [[TMP25]], align 8 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 // CHECK1-NEXT: store i8* null, i8** [[TMP26]], align 8 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -785,21 +803,21 @@ // CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK1-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.2]* [[S_ARR]], %struct.S.2* [[TMP4]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done2: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK1-NEXT: ret i32 [[TMP32]] // @@ -847,379 +865,399 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 -// CHECK1-SAME: (i64 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i64 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.2]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.2]*, align 8 +// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[S_ARR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[S_ARR_ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.2*, %struct.S.2** [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP2]], %struct.S.2** [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S.2]* [[TMP1]], [2 x %struct.S.2]** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP8]], %struct.S.2** [[TMP7]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP7]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP8]], %struct.S.2** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S.2]* [[TMP6]] to %struct.S.2* +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.2* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP11]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.2* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP13:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TMP13]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]), !llvm.access.group !14 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK1-NEXT: store i64 [[TMP22]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK1-NEXT: store i64 [[TMP24]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP25]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP26]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP27]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store [2 x %struct.S.2]* [[S_ARR]], [2 x %struct.S.2]** [[TMP30]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP32:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP4]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: store %struct.S.2* [[TMP32]], %struct.S.2** [[TMP31]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK1-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP28]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done11: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP39]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done7: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_2St -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[S]], %struct.S.2** [[S_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S.2*, %struct.S.2** [[S_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP8:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP9:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP4:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.2]*, [2 x %struct.S.2]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP11]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[TMP12]], %struct.S.2** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S.2]* [[TMP10]] to %struct.S.2* +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.2* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP17]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.2* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done6: -// CHECK1-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP9]], %struct.St* noundef [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) #[[ATTR2]] -// CHECK1-NEXT: store %struct.S.0* [[VAR7]], %struct.S.0** [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], %struct.S.2* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.2* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done3: +// CHECK1-NEXT: [[TMP19:%.*]] = load %struct.S.2*, %struct.S.2** [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[TMP19]], %struct.St* noundef [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: store %struct.S.2* [[VAR]], %struct.S.2** [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP21]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM11]] -// CHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX12]] to i8* -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false), !llvm.access.group !17 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP30:%.*]] = load %struct.S.2*, %struct.S.2** [[_TMP5]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S.2* [[ARRAYIDX8]] to i8* +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast %struct.S.2* [[TMP30]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false), !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK1-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN14]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.2], [2 x %struct.S.2]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAY_BEGIN10]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done15: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.2* [ [[TMP39]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.2* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done11: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1228,16 +1266,16 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_2St -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.2* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 -// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.2* [[S]], %struct.S.2** [[S_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], %struct.S.2* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S.2*, %struct.S.2** [[S_ADDR]], align 8 +// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], %struct.S.2* [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[T]], i32 0, i32 0 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 @@ -1247,11 +1285,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.2*, align 8 +// CHECK1-NEXT: store %struct.S.2* [[THIS]], %struct.S.2** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.2*, %struct.S.2** [[THIS_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -1463,8 +1501,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1473,141 +1510,164 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC1]] to i8* -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]), !llvm.access.group !6 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP26]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP28]], i32* [[TMP27]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP29]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP30]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK3-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP26]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP39]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done8: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done5: // CHECK3-NEXT: ret void // // @@ -1645,145 +1705,149 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC1]] to i8* -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast [2 x %struct.S]* [[TMP10]] to %struct.S* +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP20]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP20]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP12]], %struct.St* noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP22]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP23]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP18]] -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* -// CHECK3-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !10 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP30]] +// CHECK3-NEXT: store i32 [[TMP29]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP31]] +// CHECK3-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK3-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false), !llvm.access.group !10 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP35]], [[TMP34]] +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4, !llvm.access.group !10 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP36]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK3-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP29]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP41]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1791,34 +1855,34 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK3-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK3-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP1]], %struct.S.1** [[TMP]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[T_VAR_CASTED]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32* // CHECK3-NEXT: store i32 [[TMP3]], i32* [[TMP8]], align 4 @@ -1836,19 +1900,19 @@ // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK3-NEXT: store i8* null, i8** [[TMP16]], align 4 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.1]** +// CHECK3-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP18]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.0]** -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.1]** +// CHECK3-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP20]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 // CHECK3-NEXT: store i8* null, i8** [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK3-NEXT: store %struct.S.0* [[TMP5]], %struct.S.0** [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.1** +// CHECK3-NEXT: store %struct.S.1* [[TMP5]], %struct.S.1** [[TMP23]], align 4 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.0** -// CHECK3-NEXT: store %struct.S.0* [[TMP6]], %struct.S.0** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.1** +// CHECK3-NEXT: store %struct.S.1* [[TMP6]], %struct.S.1** [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 // CHECK3-NEXT: store i8* null, i8** [[TMP26]], align 4 // CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -1858,21 +1922,21 @@ // CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK3-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.1]* [[S_ARR]], %struct.S.1* [[TMP4]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK3: arraydestroy.done2: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK3-NEXT: ret i32 [[TMP32]] // @@ -1920,368 +1984,392 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK3-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 -// CHECK3-SAME: (i32 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.1]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 4 +// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP2]], %struct.S.1** [[TMP]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x %struct.S.1]* [[TMP1]], [2 x %struct.S.1]** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP7]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S.1]* [[TMP6]] to %struct.S.1* +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP11]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP13:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP13]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]), !llvm.access.group !15 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP23]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP24]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP25]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP28]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP30:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP4]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store %struct.S.1* [[TMP30]], %struct.S.1** [[TMP29]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP26]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done10: +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP37]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done7: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_2St -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[S]], %struct.S.1** [[S_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S.1*, %struct.S.1** [[S_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* noundef [[T]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP11]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP5]], i8* align 4 [[TMP6]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S.1]* [[TMP10]] to %struct.S.1* +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP17]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP9]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP19:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP19]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP21]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* -// CHECK3-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false), !llvm.access.group !18 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP29]] +// CHECK3-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP30:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP4]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP31]] +// CHECK3-NEXT: [[TMP32:%.*]] = bitcast %struct.S.1* [[ARRAYIDX6]] to i8* +// CHECK3-NEXT: [[TMP33:%.*]] = bitcast %struct.S.1* [[TMP30]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false), !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK3-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP39]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -2290,16 +2378,16 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_2St -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 4 -// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[S]], %struct.S.1** [[S_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S.1*, %struct.S.1** [[S_ADDR]], align 4 +// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[TMP0]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[T]], i32 0, i32 0 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 @@ -2309,11 +2397,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -3117,9 +3205,7 @@ // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 @@ -3127,110 +3213,114 @@ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load volatile i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* @g, align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[G_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[G]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[G1]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[SIVAR]], align 4 +// CHECK9-NEXT: store i32* [[G1]], i32** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]), !llvm.access.group !4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK9-NEXT: store i64 [[TMP17]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP18]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP19]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[G]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: store i32 [[TMP21]], i32* [[TMP20]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* @g, align 4, !llvm.access.group !4 +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[TMP24]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK9-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3239,99 +3329,105 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[G_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[G]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], i32* [[G1]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 +// CHECK9-NEXT: store i32* [[G1]], i32** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 -// CHECK9-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8, !llvm.access.group !8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 -// CHECK9-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8, !llvm.access.group !8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8, !llvm.access.group !8 -// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !8 +// CHECK9-NEXT: store i32 1, i32* [[G]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: store volatile i32 1, i32* [[TMP21]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: store i32 2, i32* [[SIVAR]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[G]], i32** [[TMP22]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: store i32* [[TMP24]], i32** [[TMP23]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP25]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp @@ -137,70 +137,83 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -209,12 +222,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -224,60 +236,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -288,75 +304,88 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l47 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !20 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !20 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -365,12 +394,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -380,43 +408,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 // CHECK1-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 @@ -424,17 +456,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -508,70 +540,83 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !26 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -580,12 +625,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -595,43 +639,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 // CHECK1-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 @@ -639,17 +687,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -660,75 +708,88 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l84 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !32 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !32 -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 +// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !32 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -737,12 +798,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -752,43 +812,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 // CHECK1-NEXT: call void @_Z3fn5v(), !llvm.access.group !35 @@ -796,17 +860,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -819,100 +883,114 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK1-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK1-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL3]], i8* [[CONV2]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL2]], i8* [[TMP1]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !38 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !38 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !38 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !38 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !38 -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 +// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !38 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -921,12 +999,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -936,43 +1013,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 // CHECK1-NEXT: call void @_Z3fn6v(), !llvm.access.group !41 @@ -980,17 +1061,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1062,70 +1143,83 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l57 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !44 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !44 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !44 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1134,12 +1228,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1149,43 +1242,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 // CHECK1-NEXT: call void @_Z3fn1v(), !llvm.access.group !47 @@ -1193,17 +1290,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1214,75 +1311,88 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..12 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !50 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !50 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !50 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !50 -// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !50 -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 +// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !50 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !50 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1291,12 +1401,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1306,43 +1415,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 // CHECK1-NEXT: call void @_Z3fn2v(), !llvm.access.group !53 @@ -1350,17 +1463,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1373,100 +1486,114 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK1-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK1-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL3]], i8* [[CONV2]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL2]], i8* [[TMP1]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK1-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !56 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !56 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !56 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !56 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !56 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !56 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !56 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !56 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !56 -// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !56 -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !56 +// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !56 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !56 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1475,12 +1602,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1490,43 +1616,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !59 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !59 // CHECK1-NEXT: call void @_Z3fn3v(), !llvm.access.group !59 @@ -1534,17 +1664,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1586,70 +1716,83 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK3-SAME: () #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !11 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1658,12 +1801,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1673,60 +1815,64 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1737,75 +1883,88 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l47 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !20 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !20 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 -// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !20 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1814,12 +1973,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1829,43 +1987,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 // CHECK3-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 @@ -1873,17 +2035,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1957,70 +2119,83 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !26 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !26 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2029,12 +2204,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2044,43 +2218,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 // CHECK3-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 @@ -2088,17 +2266,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2109,75 +2287,88 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l84 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @.omp_outlined..7(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2186,12 +2377,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2201,43 +2391,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn5v() @@ -2245,17 +2439,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2268,156 +2462,173 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK3-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 -// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK3-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL3]], i8* [[CONV2]], align 1 -// CHECK3-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK3-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL2]], i8* [[TMP1]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK3-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE7:%.*]] // CHECK3: omp_if.then: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !35 -// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 -// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !35 -// CHECK3-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] -// CHECK3: omp_if.then5: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !35 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK3-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK3-NEXT: store i64 [[TMP14]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP15]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP16]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP18:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !35 +// CHECK3-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK3-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL4]], i8* [[TMP17]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !35 +// CHECK3-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP19]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3: omp_if.then6: +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !35 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !35 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !35 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !35 -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !35 +// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP20]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !35 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END22:%.*]] -// CHECK3: omp_if.else6: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] -// CHECK3: omp.inner.for.cond7: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END21:%.*]] -// CHECK3: omp.inner.for.body9: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK3-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 -// CHECK3-NEXT: [[CONV12:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED11]] to i8* -// CHECK3-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL10]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL13]], i8* [[CONV12]], align 1 -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED11]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP26]] to i1 +// CHECK3: omp_if.else7: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK3: omp.inner.for.cond8: +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END21:%.*]] +// CHECK3: omp.inner.for.body10: +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK3-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK3-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP32:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP32]] to i1 +// CHECK3-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL12]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL13]], i8* [[TMP31]], align 8 +// CHECK3-NEXT: [[TMP33:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP33]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK3: omp_if.then15: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]]) // CHECK3-NEXT: br label [[OMP_IF_END18:%.*]] // CHECK3: omp_if.else16: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @.omp_outlined..10(i32* [[TMP34]], i32* [[DOTBOUND_ZERO_ADDR17]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]) // CHECK3-NEXT: br label [[OMP_IF_END18]] // CHECK3: omp_if.end18: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC19:%.*]] // CHECK3: omp.inner.for.inc19: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK3: omp.inner.for.end21: // CHECK3-NEXT: br label [[OMP_IF_END22]] // CHECK3: omp_if.end22: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK3-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2426,13 +2637,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2442,49 +2652,56 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK3-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 @@ -2492,38 +2709,38 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP14]], 99 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK3: cond.true6: // CHECK3-NEXT: br label [[COND_END8:%.*]] // CHECK3: cond.false7: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END8]] // CHECK3: cond.end8: -// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP15]], [[COND_FALSE7]] ] +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] // CHECK3-NEXT: store i32 [[COND9]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] // CHECK3: omp.inner.for.cond10: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] // CHECK3: omp.inner.for.body12: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] // CHECK3-NEXT: store i32 [[ADD14]], i32* [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn6v() @@ -2531,8 +2748,8 @@ // CHECK3: omp.body.continue15: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] // CHECK3: omp.inner.for.inc16: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK3: omp.inner.for.end18: @@ -2540,12 +2757,12 @@ // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2554,13 +2771,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2570,49 +2786,56 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK3-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 // CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 @@ -2620,38 +2843,38 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP14]], 99 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK3: cond.true6: // CHECK3-NEXT: br label [[COND_END8:%.*]] // CHECK3: cond.false7: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END8]] // CHECK3: cond.end8: -// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP15]], [[COND_FALSE7]] ] +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] // CHECK3-NEXT: store i32 [[COND9]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] // CHECK3: omp.inner.for.cond10: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] // CHECK3: omp.inner.for.body12: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] // CHECK3-NEXT: store i32 [[ADD14]], i32* [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn6v() @@ -2659,8 +2882,8 @@ // CHECK3: omp.body.continue15: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] // CHECK3: omp.inner.for.inc16: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK3: omp.inner.for.end18: @@ -2668,12 +2891,12 @@ // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2745,70 +2968,83 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l57 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !47 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !47 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !47 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2817,12 +3053,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2832,43 +3067,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 // CHECK3-NEXT: call void @_Z3fn1v(), !llvm.access.group !50 @@ -2876,17 +3115,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2897,75 +3136,88 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..13 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @.omp_outlined..14(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2974,12 +3226,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2989,43 +3240,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn2v() @@ -3033,17 +3288,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3056,100 +3311,114 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK3-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 -// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK3-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL3]], i8* [[CONV2]], align 1 -// CHECK3-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK3-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL2]], i8* [[TMP1]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK3-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !55 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !55 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK3-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !55 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !55 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !55 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !55 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !55 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !55 // CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !55 -// CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !55 -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !55 +// CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !55 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !55 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3158,12 +3427,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3173,43 +3441,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !58 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !58 // CHECK3-NEXT: call void @_Z3fn3v(), !llvm.access.group !58 @@ -3217,17 +3489,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3860,70 +4132,83 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK9-SAME: () #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !15 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3932,12 +4217,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3947,60 +4231,64 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4011,75 +4299,88 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l47 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !24 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 // CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4088,12 +4389,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4103,43 +4403,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK9-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 @@ -4147,17 +4451,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4231,70 +4535,83 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4303,12 +4620,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4318,43 +4634,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK9-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 @@ -4362,17 +4682,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4383,75 +4703,88 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l84 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !36 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 // CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !36 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4460,12 +4793,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4475,43 +4807,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK9-NEXT: call void @_Z3fn5v(), !llvm.access.group !39 @@ -4519,17 +4855,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4542,100 +4878,114 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK9-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK9-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK9-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK9-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK9-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK9-NEXT: store i8 [[FROMBOOL3]], i8* [[CONV2]], align 1 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK9-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK9-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL2]], i8* [[TMP1]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK9-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !42 -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !42 +// CHECK9-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK9-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !42 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !42 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !42 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !42 // CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !42 -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !42 +// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !42 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4644,12 +4994,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4659,43 +5008,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 // CHECK9-NEXT: call void @_Z3fn6v(), !llvm.access.group !45 @@ -4703,17 +5056,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4785,70 +5138,83 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l57 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !48 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !48 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !48 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4857,12 +5223,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4872,43 +5237,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 // CHECK9-NEXT: call void @_Z3fn1v(), !llvm.access.group !51 @@ -4916,17 +5285,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4937,75 +5306,88 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..12 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !54 +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !54 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 // CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 -// CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !54 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5014,12 +5396,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5029,43 +5410,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 // CHECK9-NEXT: call void @_Z3fn2v(), !llvm.access.group !57 @@ -5073,17 +5458,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5096,100 +5481,114 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK9-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK9-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK9-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK9-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK9-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK9-NEXT: store i8 [[FROMBOOL3]], i8* [[CONV2]], align 1 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK9-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK9-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL2]], i8* [[TMP1]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK9-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !60 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !60 -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !60 +// CHECK9-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK9-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !60 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !60 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !60 +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !60 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !60 // CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !60 -// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !60 -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !60 +// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !60 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !60 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5198,12 +5597,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5213,43 +5611,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !63 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !63 // CHECK9-NEXT: call void @_Z3fn3v(), !llvm.access.group !63 @@ -5257,17 +5659,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5309,70 +5711,83 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK11-SAME: () #[[ATTR1:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !15 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5381,12 +5796,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5396,60 +5810,64 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5460,75 +5878,88 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l47 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !24 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !24 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 // CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 -// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !24 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5537,12 +5968,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5552,43 +5982,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK11-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 @@ -5596,17 +6030,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5680,70 +6114,83 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !30 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !30 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5752,12 +6199,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5767,43 +6213,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK11-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 @@ -5811,17 +6261,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5832,75 +6282,88 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l84 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @.omp_outlined..7(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5909,12 +6372,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5924,43 +6386,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn5v() @@ -5968,17 +6434,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5991,156 +6457,173 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK11-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 -// CHECK11-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK11-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL3]], i8* [[CONV2]], align 1 -// CHECK11-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK11-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL2]], i8* [[TMP1]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK11-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE7:%.*]] // CHECK11: omp_if.then: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !39 -// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 -// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !39 -// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 -// CHECK11-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !39 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !39 -// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !39 -// CHECK11-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] -// CHECK11: omp_if.then5: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !39 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK11-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK11-NEXT: store i64 [[TMP14]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP15]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP16]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP18:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !39 +// CHECK11-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK11-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL4]], i8* [[TMP17]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !39 +// CHECK11-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP19]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11: omp_if.then6: +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !39 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !39 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !39 // CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !39 -// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !39 -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !39 +// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP20]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !39 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !39 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END22:%.*]] -// CHECK11: omp_if.else6: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] -// CHECK11: omp.inner.for.cond7: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END21:%.*]] -// CHECK11: omp.inner.for.body9: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK11-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK11-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 -// CHECK11-NEXT: [[CONV12:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED11]] to i8* -// CHECK11-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL10]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL13]], i8* [[CONV12]], align 1 -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED11]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK11-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP26]] to i1 +// CHECK11: omp_if.else7: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK11: omp.inner.for.cond8: +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END21:%.*]] +// CHECK11: omp.inner.for.body10: +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK11-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK11-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP29]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP30]], align 8 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP32:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP32]] to i1 +// CHECK11-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL12]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL13]], i8* [[TMP31]], align 8 +// CHECK11-NEXT: [[TMP33:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP33]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK11: omp_if.then15: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]]) // CHECK11-NEXT: br label [[OMP_IF_END18:%.*]] // CHECK11: omp_if.else16: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @.omp_outlined..10(i32* [[TMP34]], i32* [[DOTBOUND_ZERO_ADDR17]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_11]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]) // CHECK11-NEXT: br label [[OMP_IF_END18]] // CHECK11: omp_if.end18: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC19:%.*]] // CHECK11: omp.inner.for.inc19: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK11: omp.inner.for.end21: // CHECK11-NEXT: br label [[OMP_IF_END22]] // CHECK11: omp_if.end22: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK11-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6149,13 +6632,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6165,49 +6647,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK11-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 8 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 // CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 @@ -6215,38 +6704,38 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP14]], 99 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK11: cond.true6: // CHECK11-NEXT: br label [[COND_END8:%.*]] // CHECK11: cond.false7: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END8]] // CHECK11: cond.end8: -// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP15]], [[COND_FALSE7]] ] +// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] // CHECK11-NEXT: store i32 [[COND9]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] // CHECK11: omp.inner.for.cond10: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] // CHECK11: omp.inner.for.body12: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] // CHECK11-NEXT: store i32 [[ADD14]], i32* [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn6v() @@ -6254,8 +6743,8 @@ // CHECK11: omp.body.continue15: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] // CHECK11: omp.inner.for.inc16: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK11: omp.inner.for.end18: @@ -6263,12 +6752,12 @@ // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6277,13 +6766,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6293,49 +6781,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK11-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 8 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 // CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !47 @@ -6343,38 +6838,38 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP14]], 99 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK11: cond.true6: // CHECK11-NEXT: br label [[COND_END8:%.*]] // CHECK11: cond.false7: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END8]] // CHECK11: cond.end8: -// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP15]], [[COND_FALSE7]] ] +// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] // CHECK11-NEXT: store i32 [[COND9]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] // CHECK11: omp.inner.for.cond10: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] // CHECK11: omp.inner.for.body12: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] // CHECK11-NEXT: store i32 [[ADD14]], i32* [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn6v() @@ -6382,8 +6877,8 @@ // CHECK11: omp.body.continue15: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] // CHECK11: omp.inner.for.inc16: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK11: omp.inner.for.end18: @@ -6391,12 +6886,12 @@ // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6468,70 +6963,83 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l57 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !51 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !51 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !51 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !51 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !51 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6540,12 +7048,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6555,43 +7062,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !54 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !54 // CHECK11-NEXT: call void @_Z3fn1v(), !llvm.access.group !54 @@ -6599,17 +7110,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6620,75 +7131,88 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..13 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @.omp_outlined..14(i32* [[TMP14]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6697,12 +7221,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6712,43 +7235,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn2v() @@ -6756,17 +7283,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6779,100 +7306,114 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK11-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 -// CHECK11-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK11-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL3]], i8* [[CONV2]], align 1 -// CHECK11-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK11-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL2]], i8* [[TMP1]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK11-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !59 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK11-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !59 +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !59 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !59 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !59 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !59 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !59 // CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !59 -// CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !59 -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !59 +// CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group !59 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK11-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6881,12 +7422,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6896,43 +7436,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 // CHECK11-NEXT: call void @_Z3fn3v(), !llvm.access.group !62 @@ -6940,17 +7484,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp @@ -210,6 +210,7 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 @@ -219,20 +220,25 @@ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* // CHECK1-NEXT: store double* [[CONV1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[CONV]], double* [[TMP0]], i32* [[CONV2]], float* [[CONV3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[CONV]], double** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP2]], double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[CONV3]], float** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -240,105 +246,119 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store double* [[G]], double** [[G_ADDR]], align 8 -// CHECK1-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store double* [[TMP1]], double** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK1-NEXT: store double* [[TMP4]], double** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[G13]], double** [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[G1]], double** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load double*, double** [[_TMP4]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, double*, double*, i32*, float*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP13]], i64 [[TMP15]], double* [[G2]], double* [[TMP16]], i32* [[SVAR5]], float* [[SFVAR6]]), !llvm.access.group !4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK1-NEXT: store i64 [[TMP18]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP21]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP22]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store double* [[G]], double** [[TMP23]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP25:%.*]] = load double*, double** [[_TMP2]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store double* [[TMP25]], double** [[TMP24]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP26]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP27]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK1-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP23:%.*]] = load double, double* [[G2]], align 8 -// CHECK1-NEXT: store volatile double [[TMP23]], double* [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[TMP24]], align 8 -// CHECK1-NEXT: store volatile double [[TMP25]], double* [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load float, float* [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP27]], float* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load double, double* [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP34]], double* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load double*, double** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load double, double* [[TMP35]], align 8 +// CHECK1-NEXT: store volatile double [[TMP36]], double* [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP37]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP38]], float* [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -346,112 +366,116 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store double* [[G]], double** [[G_ADDR]], align 8 -// CHECK1-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store double* [[TMP1]], double** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double*, double** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP11]], align 8 +// CHECK1-NEXT: store double* [[TMP8]], double** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[G14]], double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[G1]], double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: store double 1.000000e+00, double* [[G3]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP15:%.*]] = load double*, double** [[_TMP5]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP15]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: store i32 3, i32* [[SVAR6]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR7]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[G3]], double** [[TMP16]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load double*, double** [[_TMP5]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: store double* [[TMP18]], double** [[TMP17]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[SVAR6]], i32** [[TMP19]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[SFVAR7]], float** [[TMP20]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !8 +// CHECK1-NEXT: store double 1.000000e+00, double* [[G]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP3]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP24]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store i32 3, i32* [[SVAR]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[G]], double** [[TMP25]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load double*, double** [[_TMP3]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store double* [[TMP27]], double** [[TMP26]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP28]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP29]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK1-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP26:%.*]] = load double, double* [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP26]], double* [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load double, double* [[TMP27]], align 8 -// CHECK1-NEXT: store volatile double [[TMP28]], double* [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP29]], i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load float, float* [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP30]], float* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, double* [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], double* [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load double*, double** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, double* [[TMP36]], align 8 +// CHECK1-NEXT: store volatile double [[TMP37]], double* [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP38]], i32* [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP39]], float* [[TMP12]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -493,6 +517,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 @@ -507,20 +532,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], double* [[G13]], align 8 // CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[G2]], double* [[TMP5]], i32* [[SVAR_ADDR]], float* [[CONV]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G2]], double** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load double*, double** [[_TMP4]], align 4 +// CHECK3-NEXT: store double* [[TMP7]], double** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[CONV]], float** [[TMP9]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -528,103 +558,117 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store double* [[TMP1]], double** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK3-NEXT: store double* [[TMP4]], double** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, double*, i32*, float*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP12]], i32 [[TMP13]], double* [[G2]], double* [[TMP14]], i32* [[SVAR5]], float* [[SFVAR6]]), !llvm.access.group !5 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP19]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP20]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store double* [[G]], double** [[TMP21]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[_TMP2]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store double* [[TMP23]], double** [[TMP22]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP24]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP25]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP21:%.*]] = load double, double* [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP21]], double* [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load double, double* [[TMP22]], align 4 -// CHECK3-NEXT: store volatile double [[TMP23]], double* [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP24]], i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load float, float* [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP25]], float* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP32]], double* [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP33:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load double, double* [[TMP33]], align 4 +// CHECK3-NEXT: store volatile double [[TMP34]], double* [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP35]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP36]], float* [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -632,110 +676,114 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store double* [[TMP1]], double** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double*, double** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP11]], align 4 +// CHECK3-NEXT: store double* [[TMP8]], double** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store double 1.000000e+00, double* [[G2]], align 8, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP15:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP15]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store i32 3, i32* [[SVAR5]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G2]], double** [[TMP16]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP18:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store double* [[TMP18]], double** [[TMP17]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR5]], i32** [[TMP19]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[SFVAR6]], float** [[TMP20]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !9 +// CHECK3-NEXT: store double 1.000000e+00, double* [[G]], align 8, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[_TMP2]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP24]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 3, i32* [[SVAR]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP25]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load double*, double** [[_TMP2]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store double* [[TMP27]], double** [[TMP26]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP28]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP29]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK3-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP26:%.*]] = load double, double* [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP26]], double* [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP27:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load double, double* [[TMP27]], align 4 -// CHECK3-NEXT: store volatile double [[TMP28]], double* [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP29]], i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load float, float* [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP30]], float* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP35]], double* [[TMP6]], align 8 +// CHECK3-NEXT: [[TMP36:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load double, double* [[TMP36]], align 4 +// CHECK3-NEXT: store volatile double [[TMP37]], double* [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP38]], i32* [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP39]], float* [[TMP12]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -926,6 +974,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -937,21 +986,27 @@ // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK9-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32* [[CONV]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[CONV1]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP8]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -959,31 +1014,36 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -993,119 +1053,129 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store %struct.S* [[VAR5]], %struct.S** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i32*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP14]], i64 [[TMP16]], [2 x i32]* [[VEC3]], i32* [[T_VAR2]], [2 x %struct.S]* [[S_ARR4]], %struct.S* [[TMP17]], i32* [[SVAR7]]), !llvm.access.group !5 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK9-NEXT: store i64 [[TMP20]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK9-NEXT: store i64 [[TMP22]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP23]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP24]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP25]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[TMP26]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP27]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP29:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: store %struct.S* [[TMP29]], %struct.S** [[TMP28]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store i32* [[SVAR]], i32** [[TMP30]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK9-NEXT: br i1 [[TMP38]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP26]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast [2 x %struct.S]* [[S_ARR4]] to %struct.S* -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN9]], [[TMP30]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP39]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK9-NEXT: [[TMP41:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP42:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN4]], [[TMP43]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP29]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP42]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP44:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP44]], i8* align 4 [[TMP45]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done10: -// CHECK9-NEXT: [[TMP33:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP5]] to i8* -// CHECK9-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[TMP33]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK9-NEXT: store i32 [[TMP36]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP43]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done5: +// CHECK9-NEXT: [[TMP46:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[TMP11]] to i8* +// CHECK9-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[TMP46]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP49]], i32* [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP37]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP50]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done12: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done7: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1113,39 +1183,43 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP12]], %struct.S** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1155,115 +1229,115 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store %struct.S* [[VAR6]], %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP18:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* -// CHECK9-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[TMP18]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !9 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP26]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* +// CHECK9-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[TMP28]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 4, i1 false), !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK9-NEXT: br i1 [[TMP38]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP29]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: [[TMP31:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP32:%.*]] = bitcast [2 x %struct.S]* [[S_ARR5]] to %struct.S* -// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN13]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN13]], [[TMP33]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP39]], i32* [[TMP8]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK9-NEXT: [[TMP41:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP42:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN8]], [[TMP43]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP32]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP42]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP44:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP44]], i8* align 4 [[TMP45]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP33]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done14: -// CHECK9-NEXT: [[TMP36:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP7]] to i8* -// CHECK9-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[TMP36]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP39]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP43]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP46:%.*]] = load %struct.S*, %struct.S** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[TMP17]] to i8* +// CHECK9-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[TMP46]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP49]], i32* [[TMP14]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN15]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP50]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done16: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1281,35 +1355,35 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) -// CHECK9-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK9-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP1]], %struct.S.1** [[TMP]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64* // CHECK9-NEXT: store i64 [[TMP3]], i64* [[TMP8]], align 8 @@ -1327,19 +1401,19 @@ // CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK9-NEXT: store i8* null, i8** [[TMP16]], align 8 // CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK9-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.1]** +// CHECK9-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP18]], align 8 // CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK9-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.0]** -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.1]** +// CHECK9-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP20]], align 8 // CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 // CHECK9-NEXT: store i8* null, i8** [[TMP21]], align 8 // CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK9-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK9-NEXT: store %struct.S.0* [[TMP5]], %struct.S.0** [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.1** +// CHECK9-NEXT: store %struct.S.1* [[TMP5]], %struct.S.1** [[TMP23]], align 8 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK9-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.0** -// CHECK9-NEXT: store %struct.S.0* [[TMP6]], %struct.S.0** [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.1** +// CHECK9-NEXT: store %struct.S.1* [[TMP6]], %struct.S.1** [[TMP25]], align 8 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 // CHECK9-NEXT: store i8* null, i8** [[TMP26]], align 8 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -1349,21 +1423,21 @@ // CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK9-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: -// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]] +// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i64 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.1]* [[S_ARR]], %struct.S.1* [[TMP4]]) #[[ATTR4]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done2: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK9-NEXT: ret i32 [[TMP32]] // @@ -1403,404 +1477,428 @@ // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 -// CHECK9-SAME: (i64 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i64 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.1]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 8 +// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK9-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32* [[CONV]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP2]], %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S.1]* [[TMP1]], [2 x %struct.S.1]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP7]], %struct.S.1** [[TMP6]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: -// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8, !llvm.access.group !14 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP13]], i64 [[TMP15]], [2 x i32]* [[VEC3]], i32* [[T_VAR2]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP16]]), !llvm.access.group !14 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK9-NEXT: store i64 [[TMP18]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK9-NEXT: store i64 [[TMP20]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP21]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP22]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP23]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[TMP24]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP25]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP27:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: store %struct.S.1* [[TMP27]], %struct.S.1** [[TMP26]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !14 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]]) +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK9-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK9-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN8]], [[TMP29]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP36]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK9-NEXT: [[TMP38:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP39:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN4]], [[TMP40]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP28]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done9: -// CHECK9-NEXT: [[TMP32:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[TMP4]] to i8* -// CHECK9-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[TMP32]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP39]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP41:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP42:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP41]], i8* align 4 [[TMP42]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP40]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done5: +// CHECK9-NEXT: [[TMP43:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = bitcast %struct.S.1* [[TMP9]] to i8* +// CHECK9-NEXT: [[TMP45:%.*]] = bitcast %struct.S.1* [[TMP43]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP44]], i8* align 4 [[TMP45]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done11: +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP46]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done7: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP11]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: -// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store %struct.S.0* [[VAR6]], %struct.S.0** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[ARRAYIDX10]] to i8* -// CHECK9-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[TMP17]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 4, i1 false), !llvm.access.group !17 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP26:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 8, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: [[TMP28:%.*]] = bitcast %struct.S.1* [[ARRAYIDX6]] to i8* +// CHECK9-NEXT: [[TMP29:%.*]] = bitcast %struct.S.1* [[TMP26]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 4, i1 false), !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK9-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP28]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP31:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR5]] to %struct.S.0* -// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN12]], [[TMP32]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP37]], i32* [[TMP8]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK9-NEXT: [[TMP39:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP40:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN8]], [[TMP41]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP32]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP35:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[TMP6]] to i8* -// CHECK9-NEXT: [[TMP37:%.*]] = bitcast %struct.S.0* [[TMP35]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP42:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP43:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP41]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP44:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = bitcast %struct.S.1* [[TMP15]] to i8* +// CHECK9-NEXT: [[TMP46:%.*]] = bitcast %struct.S.1* [[TMP44]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP47]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, i32* [[F]], align 4 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1951,6 +2049,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1960,21 +2059,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 // CHECK11-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32* [[T_VAR_ADDR]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1982,31 +2087,36 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -2016,117 +2126,127 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store %struct.S* [[VAR5]], %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP15:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP13]], i32 [[TMP14]], [2 x i32]* [[VEC3]], i32* [[T_VAR2]], [2 x %struct.S]* [[S_ARR4]], %struct.S* [[TMP15]], i32* [[SVAR7]]), !llvm.access.group !6 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP21]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP22]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP23]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[TMP24]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP25]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP27:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store %struct.S* [[TMP27]], %struct.S** [[TMP26]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store i32* [[SVAR]], i32** [[TMP28]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP24]], i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = bitcast [2 x %struct.S]* [[S_ARR4]] to %struct.S* -// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN9]], [[TMP28]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP37]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK11-NEXT: [[TMP39:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP40:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN4]], [[TMP41]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP27]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP42:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done10: -// CHECK11-NEXT: [[TMP31:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[TMP5]] to i8* -// CHECK11-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[TMP31]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP34]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP41]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done5: +// CHECK11-NEXT: [[TMP44:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[TMP11]] to i8* +// CHECK11-NEXT: [[TMP46:%.*]] = bitcast %struct.S* [[TMP44]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP47]], i32* [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP48]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done12: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done7: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2134,37 +2254,41 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load %struct.S*, %struct.S** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[TMP13]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP12]], %struct.S** [[TMP]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -2174,113 +2298,113 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store %struct.S* [[VAR5]], %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP18:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* -// CHECK11-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[TMP18]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !10 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP29]] +// CHECK11-NEXT: [[TMP30:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK11-NEXT: [[TMP31:%.*]] = bitcast %struct.S* [[TMP28]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 4, i1 false), !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK11-NEXT: br i1 [[TMP38]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP29]], i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: [[TMP31:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP32:%.*]] = bitcast [2 x %struct.S]* [[S_ARR4]] to %struct.S* -// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN11]], [[TMP33]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP39]], i32* [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK11-NEXT: [[TMP41:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP40]], i8* align 4 [[TMP41]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP42:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN6]], [[TMP43]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP32]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP42]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP44:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP45:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP44]], i8* align 4 [[TMP45]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP33]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP36:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP7]] to i8* -// CHECK11-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[TMP36]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP37]], i8* align 4 [[TMP38]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP39]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP43]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP46:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = bitcast %struct.S* [[TMP17]] to i8* +// CHECK11-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[TMP46]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP49:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP49]], i32* [[TMP14]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP50]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -2298,34 +2422,34 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK11-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK11-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK11-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP1]], %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK11-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP1]], %struct.S.1** [[TMP]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK11-NEXT: store i32 [[TMP2]], i32* [[T_VAR_CASTED]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32* // CHECK11-NEXT: store i32 [[TMP3]], i32* [[TMP8]], align 4 @@ -2343,19 +2467,19 @@ // CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK11-NEXT: store i8* null, i8** [[TMP16]], align 4 // CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.0]** -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to [2 x %struct.S.1]** +// CHECK11-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP18]], align 4 // CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.0]** -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = bitcast i8** [[TMP19]] to [2 x %struct.S.1]** +// CHECK11-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP20]], align 4 // CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 // CHECK11-NEXT: store i8* null, i8** [[TMP21]], align 4 // CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK11-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.0** -// CHECK11-NEXT: store %struct.S.0* [[TMP5]], %struct.S.0** [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.S.1** +// CHECK11-NEXT: store %struct.S.1* [[TMP5]], %struct.S.1** [[TMP23]], align 4 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK11-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.0** -// CHECK11-NEXT: store %struct.S.0* [[TMP6]], %struct.S.0** [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.S.1** +// CHECK11-NEXT: store %struct.S.1* [[TMP6]], %struct.S.1** [[TMP25]], align 4 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 // CHECK11-NEXT: store i8* null, i8** [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -2365,21 +2489,21 @@ // CHECK11-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK11-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: -// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[TMP4]]) #[[ATTR4]] +// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49(i32 [[TMP3]], [2 x i32]* [[VEC]], [2 x %struct.S.1]* [[S_ARR]], %struct.S.1* [[TMP4]]) #[[ATTR4]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done2: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK11-NEXT: ret i32 [[TMP32]] // @@ -2419,397 +2543,421 @@ // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 -// CHECK11-SAME: (i32 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32 noundef [[T_VAR:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.1]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 4 +// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 +// CHECK11-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32* [[T_VAR_ADDR]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP2]], %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S.1]* [[TMP1]], [2 x %struct.S.1]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP7]], %struct.S.1** [[TMP6]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: -// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP14:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP12]], i32 [[TMP13]], [2 x i32]* [[VEC3]], i32* [[T_VAR2]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP14]]), !llvm.access.group !15 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP19]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP20]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP21]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[TMP22]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP23]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP25:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: store %struct.S.1* [[TMP25]], %struct.S.1** [[TMP24]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK11-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP23]], i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN8]], [[TMP27]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK11-NEXT: [[TMP36:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP37:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN4]], [[TMP38]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP26]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done9: -// CHECK11-NEXT: [[TMP30:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[TMP4]] to i8* -// CHECK11-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[TMP30]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP37]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP39:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP40:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP39]], i8* align 4 [[TMP40]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done5: +// CHECK11-NEXT: [[TMP41:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = bitcast %struct.S.1* [[TMP9]] to i8* +// CHECK11-NEXT: [[TMP43:%.*]] = bitcast %struct.S.1* [[TMP41]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP33]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done11: +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP44]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done7: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP11]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: -// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 [[TMP18]] -// CHECK11-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* -// CHECK11-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[TMP17]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i32 4, i1 false), !llvm.access.group !18 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP25]] +// CHECK11-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP26:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: [[TMP28:%.*]] = bitcast %struct.S.1* [[ARRAYIDX4]] to i8* +// CHECK11-NEXT: [[TMP29:%.*]] = bitcast %struct.S.1* [[TMP26]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 4, i1 false), !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK11-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK11-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP28]], i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP31:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN10]], [[TMP32]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP37]], i32* [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = bitcast [2 x i32]* [[TMP6]] to i8* +// CHECK11-NEXT: [[TMP39:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP40:%.*]] = bitcast [2 x %struct.S.1]* [[S_ARR]] to %struct.S.1* +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN6]], [[TMP41]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP31]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP33:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP32]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP35:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[TMP6]] to i8* -// CHECK11-NEXT: [[TMP37:%.*]] = bitcast %struct.S.0* [[TMP35]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP40]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP42:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP43:%.*]] = bitcast %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP42]], i8* align 4 [[TMP43]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP41]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP44:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = bitcast %struct.S.1* [[TMP15]] to i8* +// CHECK11-NEXT: [[TMP46:%.*]] = bitcast %struct.S.1* [[TMP44]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP45]], i8* align 4 [[TMP46]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP47]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, i32* [[F]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp @@ -189,71 +189,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l50 // CHECK1-SAME: () #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !9 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !9 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group !9 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -262,12 +275,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -277,43 +289,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK1-NEXT: invoke void @_Z3foov() @@ -323,27 +339,27 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9:[0-9]+]], !llvm.access.group !13 +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR9:[0-9]+]], !llvm.access.group !13 // CHECK1-NEXT: unreachable // // @@ -359,85 +375,97 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: store i8 [[TMP0]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK1-NEXT: store i8 [[TMP1]], i8* [[CONV1]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], i8* [[TMP1]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !18 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !18 +// CHECK1-NEXT: [[TMP10:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group !18 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8, !llvm.access.group !18 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -446,12 +474,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -461,43 +488,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 // CHECK1-NEXT: invoke void @_Z3foov() @@ -507,27 +538,27 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !21 +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR9]], !llvm.access.group !21 // CHECK1-NEXT: unreachable // // @@ -631,71 +662,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l36 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group !24 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 5), !llvm.access.group !24 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !24 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -704,12 +748,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -719,43 +762,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK1-NEXT: invoke void @_Z3foov() @@ -765,98 +812,111 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !27 +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR9]], !llvm.access.group !27 // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l40 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group !30 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 23), !llvm.access.group !30 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !30 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !30 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -865,12 +925,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -880,43 +939,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK1-NEXT: invoke void @_Z3foov() @@ -926,98 +989,111 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !33 +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR9]], !llvm.access.group !33 // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l36 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group !36 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1), !llvm.access.group !36 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !36 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !36 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1026,12 +1102,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1041,43 +1116,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK1-NEXT: invoke void @_Z3foov() @@ -1087,27 +1166,27 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !39 +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR9]], !llvm.access.group !39 // CHECK1-NEXT: unreachable // // @@ -1116,18 +1195,17 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 // CHECK1-NEXT: invoke void @_ZN1SC1El(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: // CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i8 @_ZN1ScvcEv(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK1-NEXT: call void @_ZN1SD1Ev(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] // CHECK1-NEXT: store i8 [[CALL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK1-NEXT: store i8 [[TMP0]], i8* [[CONV]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP1]], i8* [[TMP0]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: // CHECK1-NEXT: [[TMP2:%.*]] = landingpad { i8*, i32 } @@ -1138,71 +1216,84 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK1-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !42 -// CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !42 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !42 +// CHECK1-NEXT: [[TMP10:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group !42 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8, !llvm.access.group !42 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !42 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1211,12 +1302,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1226,43 +1316,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 // CHECK1-NEXT: invoke void @_Z3foov() @@ -1272,27 +1366,27 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !45 +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR9]], !llvm.access.group !45 // CHECK1-NEXT: unreachable // // @@ -1766,71 +1860,84 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l50 // CHECK5-SAME: () #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !9 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !9 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group !9 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !9 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1839,12 +1946,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1854,43 +1960,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK5-NEXT: invoke void @_Z3foov() @@ -1900,27 +2010,27 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9:[0-9]+]], !llvm.access.group !13 +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR9:[0-9]+]], !llvm.access.group !13 // CHECK5-NEXT: unreachable // // @@ -1936,85 +2046,97 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* // CHECK5-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: store i8 [[TMP0]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK5-NEXT: store i8 [[TMP1]], i8* [[CONV1]], align 1 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64 [[TMP2]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: store i8 [[TMP2]], i8* [[TMP1]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK5-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK5-NEXT: store i8 [[TMP2]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !18 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !18 +// CHECK5-NEXT: [[TMP10:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group !18 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8, !llvm.access.group !18 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK5-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2023,12 +2145,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2038,43 +2159,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2084,27 +2209,27 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !21 +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR9]], !llvm.access.group !21 // CHECK5-NEXT: unreachable // // @@ -2199,71 +2324,84 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l36 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group !24 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 5), !llvm.access.group !24 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !24 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !24 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2272,12 +2410,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2287,43 +2424,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2333,98 +2474,111 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !27 +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR9]], !llvm.access.group !27 // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l40 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group !30 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 23), !llvm.access.group !30 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !30 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !30 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2433,12 +2587,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2448,43 +2601,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2494,98 +2651,111 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !33 +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR9]], !llvm.access.group !33 // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l36 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*)) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group !36 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1), !llvm.access.group !36 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !36 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !36 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2594,12 +2764,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2609,43 +2778,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2655,27 +2828,27 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !39 +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR9]], !llvm.access.group !39 // CHECK5-NEXT: unreachable // // @@ -2684,18 +2857,17 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 // CHECK5-NEXT: invoke void @_ZN1SC1El(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) // CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK5: invoke.cont: // CHECK5-NEXT: [[CALL:%.*]] = call noundef signext i8 @_ZN1ScvcEv(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK5-NEXT: call void @_ZN1SD1Ev(%struct.S* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] // CHECK5-NEXT: store i8 [[CALL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[TMP0:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK5-NEXT: store i8 [[TMP0]], i8* [[CONV]], align 1 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: store i8 [[TMP1]], i8* [[TMP0]], align 1 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: // CHECK5-NEXT: [[TMP2:%.*]] = landingpad { i8*, i32 } @@ -2706,71 +2878,84 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK5-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK5-NEXT: store i8 [[TMP2]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !42 -// CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !42 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !42 +// CHECK5-NEXT: [[TMP10:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !42 +// CHECK5-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group !42 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !42 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !42 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8, !llvm.access.group !42 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8, !llvm.access.group !42 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !42 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK5-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2779,12 +2964,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2794,43 +2978,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2840,27 +3028,27 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !45 +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { i8*, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP19]]) #[[ATTR9]], !llvm.access.group !45 // CHECK5-NEXT: unreachable // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp @@ -278,29 +278,36 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -316,62 +323,68 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !5 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -381,12 +394,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -401,14 +413,18 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -424,77 +440,77 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM3]] -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false), !llvm.access.group !9 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !9 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP27]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -507,23 +523,23 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) -// CHECK1-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 8 -// CHECK1-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK1-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 8 +// CHECK1-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK1-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2) // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0) // CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 @@ -533,306 +549,322 @@ // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done2: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK1-NEXT: ret i32 [[TMP4]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !14 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN4]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP3]], align 8, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i64 4, i1 false), !llvm.access.group !17 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP17:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP3]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S.1* [[ARRAYIDX6]] to i8* +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S.1* [[TMP17]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 4, i1 false), !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP21]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP26]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done9: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -841,11 +873,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK1-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -999,29 +1031,36 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -1037,60 +1076,66 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !6 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP10]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP11]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i32 2 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -1100,12 +1145,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1120,14 +1164,18 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 @@ -1141,75 +1189,75 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP12]] -// CHECK3-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false), !llvm.access.group !10 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP17]] +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false), !llvm.access.group !10 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] // CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4, !llvm.access.group !10 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP27]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1222,23 +1270,23 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK3-NEXT: store %struct.S.0* [[TEST]], %struct.S.0** [[VAR]], align 4 -// CHECK3-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK3-NEXT: store %struct.S.1* [[TEST]], %struct.S.1** [[VAR]], align 4 +// CHECK3-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK3-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2) // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0) // CHECK3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 @@ -1248,300 +1296,316 @@ // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK3: arraydestroy.done2: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK3-NEXT: ret i32 [[TMP4]] // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK3-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: -// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK3-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !15 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP10]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP11]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN4]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK3: arraydestroy.done5: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 +// CHECK3-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: store %struct.S.1* undef, %struct.S.1** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: -// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK3-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast %struct.S.0* [[TMP12]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP14]], i8* align 4 [[TMP15]], i32 4, i1 false), !llvm.access.group !18 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP17:%.*]] = load %struct.S.1*, %struct.S.1** [[_TMP2]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast %struct.S.1* [[ARRAYIDX4]] to i8* +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast %struct.S.1* [[TMP17]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i32 4, i1 false), !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP21]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP26]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK3: arraydestroy.done7: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1550,11 +1614,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK3-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK3-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -2464,18 +2528,20 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 @@ -2483,61 +2549,72 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32* undef, i32** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store i32* [[G1]], i32** [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2546,12 +2623,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 @@ -2564,77 +2640,81 @@ // CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK9-NEXT: store i32* undef, i32** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store i32* [[G1]], i32** [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 // CHECK9-NEXT: store i32 1, i32* [[G]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !8 -// CHECK9-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: store volatile i32 1, i32* [[TMP15]], align 4, !llvm.access.group !8 // CHECK9-NEXT: store i32 2, i32* [[SIVAR]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[G]], i32** [[TMP11]], align 8, !llvm.access.group !8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !8 -// CHECK9-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8, !llvm.access.group !8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP14]], align 8, !llvm.access.group !8 -// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[G]], i32** [[TMP16]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[_TMP3]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: store i32* [[TMP18]], i32** [[TMP17]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP19]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp @@ -82,71 +82,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l36 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 4), !llvm.access.group !6 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !6 +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 4), !llvm.access.group !6 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !6 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !6 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -155,12 +168,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -170,60 +182,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -234,71 +250,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l39 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 3), !llvm.access.group !15 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 3), !llvm.access.group !15 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -307,12 +336,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -322,60 +350,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -401,71 +433,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l29 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !21 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !21 +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group !21 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -474,12 +519,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -489,60 +533,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp @@ -137,209 +137,227 @@ // CHECK1-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i32* [[SIVAR1]]), !llvm.access.group !5 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[TMP16]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP23]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP27]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR2]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[SIVAR2]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[SIVAR2]] to i8* -// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, i8* [[TMP24]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP28]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -435,209 +453,227 @@ // CHECK1-SAME: (i64 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[T_VAR1]], align 4 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i32* [[T_VAR1]]), !llvm.access.group !14 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[TMP16]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[T_VAR1]] to i8* -// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP23]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP27]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[T_VAR2]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[T_VAR]], align 4, !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[T_VAR2]] to i8* -// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, i8* [[TMP24]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP28]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -737,204 +773,222 @@ // CHECK3-SAME: (i32 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[SIVAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[SIVAR_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], i32* [[SIVAR1]]), !llvm.access.group !6 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[TMP14]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR1]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[SIVAR]], align 4, !llvm.access.group !10 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, i8* [[TMP24]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP28]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1029,204 +1083,222 @@ // CHECK3-SAME: (i32 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[T_VAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[T_VAR1]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], i32* [[T_VAR1]]), !llvm.access.group !15 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[T_VAR]], i32** [[TMP14]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i32* [[T_VAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP21]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR1]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[T_VAR1]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[T_VAR]], align 4, !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i32* [[T_VAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, i8* [[TMP24]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP28]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1501,213 +1573,231 @@ // CHECK9-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i32* [[SIVAR1]]), !llvm.access.group !4 +// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP16]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP17:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK9-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK9-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP22:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK9-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK9-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP23]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP27]] monotonic, align 4 // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: store i32 0, i32* [[SIVAR2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK9-NEXT: store i32 0, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR2]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[SIVAR2]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[SIVAR2]], i32** [[TMP13]], align 8, !llvm.access.group !8 -// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group !8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP19]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group !8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK9-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP18:%.*]] = bitcast i32* [[SIVAR2]] to i8* -// CHECK9-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK9-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP19]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP24:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK9-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK9-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, i8* [[TMP25]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP26]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[SIVAR2]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: store i32 [[ADD6]], i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: store i32 [[ADD5]], i32* [[TMP6]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR2]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP23]] monotonic, align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = atomicrmw add i32* [[TMP6]], i32 [[TMP29]] monotonic, align 4 // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp @@ -357,75 +357,91 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !8 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -434,13 +450,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -450,67 +464,71 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -522,75 +540,91 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !17 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -599,13 +633,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -615,67 +647,71 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -687,75 +723,91 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !23 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8, !llvm.access.group !23 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !23 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -764,13 +816,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -780,88 +830,92 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -873,75 +927,91 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !29 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -950,13 +1020,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -966,55 +1034,59 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1022,9 +1094,9 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1036,75 +1108,91 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !35 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1113,13 +1201,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1129,55 +1215,59 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1185,9 +1275,9 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1341,75 +1431,91 @@ // CHECK2-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !8 -// CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !8 +// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !8 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, i32* [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1418,13 +1524,11 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1434,67 +1538,71 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK2-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK2-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, i32* [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1506,75 +1614,91 @@ // CHECK2-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !17 -// CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !17 +// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8, !llvm.access.group !17 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !17 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !17 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, i32* [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1583,13 +1707,11 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1599,67 +1721,71 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK2-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 -// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !20 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK2-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, i32* [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1671,75 +1797,91 @@ // CHECK2-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 -// CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !23 +// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8, !llvm.access.group !23 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !23 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, i32* [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1748,13 +1890,11 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1764,88 +1904,92 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK2-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !26 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK2-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK2-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, i32* [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1857,75 +2001,91 @@ // CHECK2-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 -// CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !29 +// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8, !llvm.access.group !29 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !29 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, i32* [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1934,13 +2094,11 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1950,55 +2108,59 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK2-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !32 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK2: omp.inner.for.end: @@ -2006,9 +2168,9 @@ // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK2-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, i32* [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2020,75 +2182,91 @@ // CHECK2-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 -// CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !35 +// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP16]], align 8, !llvm.access.group !35 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !35 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, i32* [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2097,13 +2275,11 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2113,55 +2289,59 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK2-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !38 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK2: omp.inner.for.end: @@ -2169,9 +2349,9 @@ // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK2-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, i32* [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2325,73 +2505,89 @@ // CHECK5-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !9 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2400,13 +2596,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2416,64 +2610,68 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2485,73 +2683,89 @@ // CHECK5-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !18 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2560,13 +2774,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2576,64 +2788,68 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2645,73 +2861,89 @@ // CHECK5-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !24 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !24 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2720,13 +2952,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2736,83 +2966,87 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP15]] +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP21]] // CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK5-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK5-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2824,73 +3058,89 @@ // CHECK5-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !30 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !30 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2899,13 +3149,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2915,52 +3163,56 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP18]] // CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -2968,9 +3220,9 @@ // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2982,73 +3234,89 @@ // CHECK5-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !36 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5: omp.inner.for.body: +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !36 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3057,13 +3325,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3073,52 +3339,56 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP18]] // CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3126,9 +3396,9 @@ // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, i32* [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3282,73 +3552,89 @@ // CHECK6-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK6-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK6-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !9 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !9 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 -// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK6-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK6-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, i32* [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3357,13 +3643,11 @@ // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3373,64 +3657,68 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK6-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK6-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, i32* [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3442,73 +3730,89 @@ // CHECK6-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK6-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK6-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !18 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !18 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 -// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK6-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK6-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, i32* [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3517,13 +3821,11 @@ // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3533,64 +3835,68 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP17]] // CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK6-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK6-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, i32* [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3602,73 +3908,89 @@ // CHECK6-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK6-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK6-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !24 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !24 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 -// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK6-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK6-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, i32* [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3677,13 +3999,11 @@ // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3693,83 +4013,87 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK6-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 -// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP15]] +// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP21]] // CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK6-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: -// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK6-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK6-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK6-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK6-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK6-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK6-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK6-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, i32* [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3781,73 +4105,89 @@ // CHECK6-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK6-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK6-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !30 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !30 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 -// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK6-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK6-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, i32* [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3856,13 +4196,11 @@ // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3872,52 +4210,56 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK6-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK6-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK6-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 -// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] +// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP18]] // CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK6: omp.inner.for.end: @@ -3925,9 +4267,9 @@ // CHECK6: omp.dispatch.inc: // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK6-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK6-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, i32* [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3939,73 +4281,89 @@ // CHECK6-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK6-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK6-NEXT: store i32 122, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !36 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP14]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !36 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 -// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK6-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK6-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, i32* [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4014,13 +4372,11 @@ // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4030,52 +4386,56 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load %struct.SS*, %struct.SS** [[TMP5]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK6-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK6-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK6-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 -// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] +// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP18]] // CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK6: omp.inner.for.end: @@ -4083,9 +4443,9 @@ // CHECK6: omp.dispatch.inc: // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK6-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK6-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, i32* [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4857,24 +5217,29 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4884,81 +5249,98 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !13 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP24]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP25]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[TMP27]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !13 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK13-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK13-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK13-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -4971,15 +5353,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4992,90 +5370,94 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK13-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK13-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -5093,24 +5475,29 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5120,81 +5507,98 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !22 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP24]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP25]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[TMP27]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !22 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK13-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK13-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK13-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -5207,15 +5611,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5228,90 +5628,94 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK13-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK13-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -5331,7 +5735,7 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -5342,23 +5746,26 @@ // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i32* [[CONV1]], i32** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5368,117 +5775,133 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !28 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK13-NEXT: store i64 [[TMP24]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK13-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP27]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP28]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP2]], i32** [[TMP29]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: store i32 [[TMP33]], i32* [[TMP32]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !28 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] -// CHECK13-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] -// CHECK13: cond.true12: -// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: br label [[COND_END14:%.*]] -// CHECK13: cond.false13: -// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: br label [[COND_END14]] -// CHECK13: cond.end14: -// CHECK13-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE12]] ], [ [[TMP32]], [[COND_FALSE13]] ] -// CHECK13-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] +// CHECK13-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK13: cond.true11: +// CHECK13-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: br label [[COND_END13:%.*]] +// CHECK13: cond.false12: +// CHECK13-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: br label [[COND_END13]] +// CHECK13: cond.end13: +// CHECK13-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] +// CHECK13-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: store i32 [[TMP44]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) -// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 -// CHECK13-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) +// CHECK13-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0 +// CHECK13-NEXT: br i1 [[TMP48]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB16:%.*]] = sub nsw i32 [[TMP38]], 0 -// CHECK13-NEXT: [[DIV17:%.*]] = sdiv i32 [[SUB16]], 1 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV17]], 1 -// CHECK13-NEXT: [[ADD18:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD18]], i32* [[I4]], align 4 +// CHECK13-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP49]], 0 +// CHECK13-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 +// CHECK13-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] +// CHECK13-NEXT: store i32 [[ADD17]], i32* [[I4]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: br label [[OMP_PRECOND_END]] @@ -5487,16 +5910,12 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5506,99 +5925,104 @@ // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK13-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 -// CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !31 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !31 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !31 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK13-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK13-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK13-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK13-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK13-NEXT: store i32 [[ADD13]], i32* [[I6]], align 4 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK13-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK13-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK13-NEXT: store i32 [[ADD12]], i32* [[I5]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: br label [[OMP_PRECOND_END]] @@ -5612,24 +6036,29 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5639,81 +6068,98 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !34 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP24]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP25]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[TMP27]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !34 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK13-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK13-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK13-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -5726,15 +6172,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5747,73 +6189,77 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !37 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !37 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !37 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK13-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -5821,12 +6267,12 @@ // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK13-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK13-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK13-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -5846,7 +6292,7 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK13-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -5857,23 +6303,26 @@ // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i32* [[CONV1]], i32** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5883,92 +6332,108 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]]), !llvm.access.group !40 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK13-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], i64* [[TMP29]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i32* [[TMP6]], i32** [[TMP30]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !40 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK13-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK13-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK13-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV9]], 1 -// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD10]], i32* [[I4]], align 4 +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP39]], 0 +// CHECK13-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[I4]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: br label [[OMP_PRECOND_END]] @@ -5977,16 +6442,12 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5996,96 +6457,101 @@ // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK13-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !43 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !43 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !43 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK13-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK13-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 -// CHECK13-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 -// CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK13-NEXT: store i32 [[ADD12]], i32* [[I6]], align 4 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK13-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I5]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: br label [[OMP_PRECOND_END]] @@ -6244,75 +6710,91 @@ // CHECK13-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46 +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 8, !llvm.access.group !46 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !46 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6321,13 +6803,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6337,66 +6817,70 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !49 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !49 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !49 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6408,75 +6892,91 @@ // CHECK13-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52 +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !52 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !52 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !52 +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !52 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 8, !llvm.access.group !52 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !52 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6485,13 +6985,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6501,66 +6999,70 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !55 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !55 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !55 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6574,92 +7076,105 @@ // CHECK13-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.16*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK13-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !58 -// CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 8, !llvm.access.group !58 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !58 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6668,14 +7183,12 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6685,90 +7198,95 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK13-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !61 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK13-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]]) +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK13-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6780,75 +7298,91 @@ // CHECK13-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.20*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.20* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.20*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.20* [[__CONTEXT]], %struct.anon.20** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.20*, %struct.anon.20** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], %struct.anon.20* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !64 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !64 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64 +// CHECK13-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !64 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !64 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !64 +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !64 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 8, !llvm.access.group !64 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.21*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !64 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !64 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !64 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP65:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6857,13 +7391,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.21* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.21*, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6873,54 +7405,58 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK13-NEXT: store %struct.anon.21* [[__CONTEXT]], %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], %struct.anon.21* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK13-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !67 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !67 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !67 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !67 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !67 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !67 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 -// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK13-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -6928,9 +7464,9 @@ // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK13-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6944,92 +7480,105 @@ // CHECK13-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.22*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.22* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.22*, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK13-NEXT: store %struct.anon.22* [[__CONTEXT]], %struct.anon.22** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.22*, %struct.anon.22** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], %struct.anon.22* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !70 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !70 -// CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 8, !llvm.access.group !70 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.24*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !70 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !70 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7038,14 +7587,12 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.24* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.24*, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7055,67 +7602,72 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK13-NEXT: store %struct.anon.24* [[__CONTEXT]], %struct.anon.24** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load %struct.anon.24*, %struct.anon.24** [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], %struct.anon.24* [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK13-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !73 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !73 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !73 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !73 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !73 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !73 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK13-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK13-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, i32* [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7502,24 +8054,29 @@ // CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK14-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK14-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK14-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7529,81 +8086,98 @@ // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK14-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 -// CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 -// CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !13 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK14-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK14-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP24]], align 8, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP25]], align 8, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 8, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: store i64 [[TMP4]], i64* [[TMP27]], align 8, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK14-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 8, !llvm.access.group !13 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !13 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK14-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK14-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK14-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK14-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -7616,15 +8190,11 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7637,90 +8207,94 @@ // CHECK14-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK14-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK14-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK14-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK14-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !17 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !17 -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !17 +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK14-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK14-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK14-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK14-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -7738,24 +8312,29 @@ // CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK14-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK14-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK14-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7765,81 +8344,98 @@ // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK14-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 -// CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 -// CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !22 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK14-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK14-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP24]], align 8, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP25]], align 8, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 8, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: store i64 [[TMP4]], i64* [[TMP27]], align 8, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK14-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 8, !llvm.access.group !22 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !22 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK14-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK14-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK14-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK14-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -7852,15 +8448,11 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7873,90 +8465,94 @@ // CHECK14-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK14-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK14-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK14-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK14-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !25 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK14-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK14-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK14-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK14-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -7976,7 +8572,7 @@ // CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK14-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK14-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK14-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -7987,23 +8583,26 @@ // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]]) +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store i32* [[CONV1]], i32** [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8013,117 +8612,133 @@ // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK14-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK14-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK14-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK14-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !28 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK14-NEXT: store i64 [[TMP24]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK14-NEXT: store i64 [[TMP26]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP27]], align 8, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP28]], align 8, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store i32* [[TMP2]], i32** [[TMP29]], align 8, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: store i64 [[TMP4]], i64* [[TMP30]], align 8, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK14-NEXT: store i32* [[TMP6]], i32** [[TMP31]], align 8, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: store i32 [[TMP33]], i32* [[TMP32]], align 8, !llvm.access.group !28 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !28 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK14-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] -// CHECK14-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] -// CHECK14: cond.true12: -// CHECK14-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: br label [[COND_END14:%.*]] -// CHECK14: cond.false13: -// CHECK14-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: br label [[COND_END14]] -// CHECK14: cond.end14: -// CHECK14-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE12]] ], [ [[TMP32]], [[COND_FALSE13]] ] -// CHECK14-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] +// CHECK14-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK14: cond.true11: +// CHECK14-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: br label [[COND_END13:%.*]] +// CHECK14: cond.false12: +// CHECK14-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: br label [[COND_END13]] +// CHECK14: cond.end13: +// CHECK14-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] +// CHECK14-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: store i32 [[TMP44]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP35]]) -// CHECK14-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 -// CHECK14-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) +// CHECK14-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0 +// CHECK14-NEXT: br i1 [[TMP48]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB16:%.*]] = sub nsw i32 [[TMP38]], 0 -// CHECK14-NEXT: [[DIV17:%.*]] = sdiv i32 [[SUB16]], 1 -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV17]], 1 -// CHECK14-NEXT: [[ADD18:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD18]], i32* [[I4]], align 4 +// CHECK14-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP49]], 0 +// CHECK14-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 +// CHECK14-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] +// CHECK14-NEXT: store i32 [[ADD17]], i32* [[I4]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK14: .omp.final.done: // CHECK14-NEXT: br label [[OMP_PRECOND_END]] @@ -8132,16 +8747,12 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8151,99 +8762,104 @@ // CHECK14-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 8 +// CHECK14-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK14-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK14-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK14-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK14-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] -// CHECK14-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK14-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +// CHECK14-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 -// CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 +// CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !31 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !31 -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !31 +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !31 +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !31 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK14-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK14-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK14-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK14-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK14-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK14-NEXT: store i32 [[ADD13]], i32* [[I6]], align 4 +// CHECK14-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK14-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK14-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK14-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK14-NEXT: store i32 [[ADD12]], i32* [[I5]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK14: .omp.final.done: // CHECK14-NEXT: br label [[OMP_PRECOND_END]] @@ -8257,24 +8873,29 @@ // CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK14-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK14-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK14-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8284,81 +8905,98 @@ // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK14-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 -// CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 -// CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !34 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 +// CHECK14-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK14-NEXT: store i64 [[TMP21]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !34 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK14-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK14-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !34 +// CHECK14-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP24]], align 8, !llvm.access.group !34 +// CHECK14-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP25]], align 8, !llvm.access.group !34 +// CHECK14-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 8, !llvm.access.group !34 +// CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: store i64 [[TMP4]], i64* [[TMP27]], align 8, !llvm.access.group !34 +// CHECK14-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK14-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 8, !llvm.access.group !34 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !34 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK14-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK14-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK14-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK14-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -8371,15 +9009,11 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8392,73 +9026,77 @@ // CHECK14-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK14-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK14-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK14-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK14-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK14-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !37 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !37 -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !37 +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !37 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK14-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK14-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK14-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK14: omp.inner.for.end: @@ -8466,12 +9104,12 @@ // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK14-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK14-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK14-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK14-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -8491,7 +9129,7 @@ // CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK14-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK14-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK14-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -8502,23 +9140,26 @@ // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]]) +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store i32* [[CONV1]], i32** [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8528,92 +9169,108 @@ // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK14-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK14-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK14-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK14-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 -// CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 -// CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !40 -// CHECK14-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4, !llvm.access.group !40 -// CHECK14-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]]), !llvm.access.group !40 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK14-NEXT: store i64 [[TMP23]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK14-NEXT: store i64 [[TMP25]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP26]], align 8, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP27]], align 8, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store i32* [[TMP2]], i32** [[TMP28]], align 8, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: store i64 [[TMP4]], i64* [[TMP29]], align 8, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK14-NEXT: store i32* [[TMP6]], i32** [[TMP30]], align 8, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 8, !llvm.access.group !40 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !40 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK14-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK14-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP36]]) +// CHECK14-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK14-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK14-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV9]], 1 -// CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD10]], i32* [[I4]], align 4 +// CHECK14-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP39]], 0 +// CHECK14-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 +// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] +// CHECK14-NEXT: store i32 [[ADD9]], i32* [[I4]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK14: .omp.final.done: // CHECK14-NEXT: br label [[OMP_PRECOND_END]] @@ -8622,16 +9279,12 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8641,96 +9294,101 @@ // CHECK14-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 5 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 8 +// CHECK14-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK14-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK14-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK14-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK14-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 1073741859, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK14-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK14-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK14-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK14-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 -// CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] +// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !43 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !43 -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !43 +// CHECK14-NEXT: [[TMP31:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !43 +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !43 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK14-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK14-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK14-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 -// CHECK14-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 -// CHECK14-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK14-NEXT: store i32 [[ADD12]], i32* [[I6]], align 4 +// CHECK14-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK14-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I5]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK14: .omp.final.done: // CHECK14-NEXT: br label [[OMP_PRECOND_END]] @@ -8889,75 +9547,91 @@ // CHECK14-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK14-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK14-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 -// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 -// CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46 +// CHECK14-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !46 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK14-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK14-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !46 +// CHECK14-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !46 +// CHECK14-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !46 +// CHECK14-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 8, !llvm.access.group !46 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !46 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK14-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK14-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, i32* [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -8966,13 +9640,11 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8982,66 +9654,70 @@ // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK14-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK14-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK14-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 -// CHECK14-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 +// CHECK14-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK14-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !49 -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !49 +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !49 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK14-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK14-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, i32* [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9053,75 +9729,91 @@ // CHECK14-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK14-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK14-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 -// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 +// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 -// CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52 +// CHECK14-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !52 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 +// CHECK14-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK14-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !52 +// CHECK14-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !52 +// CHECK14-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !52 +// CHECK14-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 8, !llvm.access.group !52 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !52 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK14-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK14-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, i32* [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9130,13 +9822,11 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9146,66 +9836,70 @@ // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK14-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK14-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK14-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 -// CHECK14-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 +// CHECK14-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK14-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !55 -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !55 +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !55 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 -// CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK14-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK14-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, i32* [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9219,92 +9913,105 @@ // CHECK14-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK14-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]]) +// CHECK14-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP2]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.16*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK14-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK14-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK14-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK14-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 -// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 +// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 -// CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 -// CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !58 -// CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 -// CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 +// CHECK14-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK14-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !58 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 +// CHECK14-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK14-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !58 +// CHECK14-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8, !llvm.access.group !58 +// CHECK14-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8, !llvm.access.group !58 +// CHECK14-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8, !llvm.access.group !58 +// CHECK14-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !58 +// CHECK14-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 8, !llvm.access.group !58 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !58 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK14-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK14-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, i32* [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9313,14 +10020,12 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9330,90 +10035,95 @@ // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK14-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK14-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK14-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK14-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK14-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK14-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK14-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK14-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !61 -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !61 +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !61 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK14-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK14-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK14-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]]) +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK14-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, i32* [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9425,75 +10135,91 @@ // CHECK14-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.20*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.20* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.20*, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK14-NEXT: store %struct.anon.20* [[__CONTEXT]], %struct.anon.20** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load %struct.anon.20*, %struct.anon.20** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], %struct.anon.20* [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK14-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 -// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 +// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !64 -// CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !64 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64 +// CHECK14-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !64 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 +// CHECK14-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK14-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !64 +// CHECK14-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP14]], align 8, !llvm.access.group !64 +// CHECK14-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP15]], align 8, !llvm.access.group !64 +// CHECK14-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 8, !llvm.access.group !64 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.21*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !64 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !64 -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !64 +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP65:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK14-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK14-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, i32* [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9502,13 +10228,11 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.21* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.21*, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9518,54 +10242,58 @@ // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK14-NEXT: store %struct.anon.21* [[__CONTEXT]], %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], %struct.anon.21* [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK14-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK14-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: -// CHECK14-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK14-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK14-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !67 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !67 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !67 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !67 -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !67 +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !67 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 -// CHECK14-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK14-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK14-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK14: omp.inner.for.end: @@ -9573,9 +10301,9 @@ // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK14-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK14-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, i32* [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9589,92 +10317,105 @@ // CHECK14-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK14-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]]) +// CHECK14-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP2]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.22*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.22* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.22*, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK14-NEXT: store %struct.anon.22* [[__CONTEXT]], %struct.anon.22** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load %struct.anon.22*, %struct.anon.22** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], %struct.anon.22* [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK14-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK14-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK14-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 -// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 +// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !70 -// CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 -// CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !70 -// CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 -// CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !70 +// CHECK14-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK14-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group !70 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 +// CHECK14-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK14-NEXT: store i64 [[TMP15]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group !70 +// CHECK14-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP16]], align 8, !llvm.access.group !70 +// CHECK14-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP17]], align 8, !llvm.access.group !70 +// CHECK14-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP18]], align 8, !llvm.access.group !70 +// CHECK14-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !70 +// CHECK14-NEXT: store i32 [[TMP20]], i32* [[TMP19]], align 8, !llvm.access.group !70 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.24*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !70 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !70 -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !70 +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK14-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK14-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, i32* [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9683,14 +10424,12 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.24* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.24*, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9700,67 +10439,72 @@ // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* +// CHECK14-NEXT: store %struct.anon.24* [[__CONTEXT]], %struct.anon.24** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load %struct.anon.24*, %struct.anon.24** [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], %struct.anon.24* [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK14-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK14-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK14-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK14-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK14-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: -// CHECK14-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK14-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK14-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !73 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !73 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !73 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !73 -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !73 +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !73 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 -// CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK14-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK14-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK14-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK14-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, i32* [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10144,23 +10888,28 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK17-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK17-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10170,79 +10919,96 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !14 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP22]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP23]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP2]], i32** [[TMP24]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[TMP25]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i32* [[TMP6]], i32** [[TMP26]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !14 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK17-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK17-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -10255,15 +11021,11 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10276,87 +11038,91 @@ // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK17-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK17-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK17-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -10374,23 +11140,28 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK17-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK17-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10400,79 +11171,96 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !23 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP22]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP23]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP2]], i32** [[TMP24]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[TMP25]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i32* [[TMP6]], i32** [[TMP26]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !23 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK17-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK17-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -10485,15 +11273,11 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10506,87 +11290,91 @@ // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !26 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK17-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK17-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK17-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -10606,7 +11394,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK17-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK17-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -10615,22 +11403,26 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[N_ADDR]], i32** [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10640,109 +11432,127 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !29 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: store i32 [[TMP24]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP25]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP26]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP2]], i32** [[TMP27]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: store i32 [[TMP31]], i32* [[TMP30]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !29 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK17-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK17-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK17: cond.true11: -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK17-NEXT: br label [[COND_END13:%.*]] // CHECK17: cond.false12: -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK17-NEXT: br label [[COND_END13]] // CHECK17: cond.end13: -// CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] +// CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE11]] ], [ [[TMP41]], [[COND_FALSE12]] ] // CHECK17-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 -// CHECK17-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: store i32 [[TMP42]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) -// CHECK17-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK17-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) +// CHECK17-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK17-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK17-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP47]], 0 // CHECK17-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK17-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -10755,16 +11565,12 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10777,88 +11583,94 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !32 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !32 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !32 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP29]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK17-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK17-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK17-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -10876,23 +11688,28 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK17-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK17-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10902,79 +11719,96 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !35 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 +// CHECK17-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !35 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK17-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !35 +// CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP22]], align 4, !llvm.access.group !35 +// CHECK17-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP23]], align 4, !llvm.access.group !35 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP2]], i32** [[TMP24]], align 4, !llvm.access.group !35 +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[TMP25]], align 4, !llvm.access.group !35 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i32* [[TMP6]], i32** [[TMP26]], align 4, !llvm.access.group !35 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !35 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK17-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK17-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -10987,15 +11821,11 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11008,70 +11838,74 @@ // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !38 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !38 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP20]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !38 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP28]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK17-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -11079,12 +11913,12 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK17-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK17-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK17-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]] @@ -11104,7 +11938,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK17-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK17-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -11113,22 +11947,26 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[N_ADDR]], i32** [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11138,84 +11976,102 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !41 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !41 -// CHECK17-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]]), !llvm.access.group !41 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[TMP27]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !41 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !41 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK17-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK17-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK17-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -11228,16 +12084,12 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11250,72 +12102,78 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !44 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !44 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP21]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !44 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP31]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -11323,12 +12181,12 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK17-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK17-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK17-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -11489,73 +12347,89 @@ // CHECK17-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK17-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 +// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !47 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !47 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !47 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !47 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP14]], align 4, !llvm.access.group !47 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !47 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK17-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK17-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11564,13 +12438,11 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11580,63 +12452,67 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK17-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !50 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !50 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !50 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK17-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11648,73 +12524,89 @@ // CHECK17-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK17-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53 +// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !53 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !53 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !53 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !53 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP14]], align 4, !llvm.access.group !53 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !53 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !53 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !53 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK17-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK17-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11723,13 +12615,11 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11739,63 +12629,67 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK17-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !56 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !56 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !56 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !56 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !56 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !56 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK17-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11809,86 +12703,102 @@ // CHECK17-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK17-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK17-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !59 -// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 +// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !59 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !59 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP14]], align 4, !llvm.access.group !59 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP15]], align 4, !llvm.access.group !59 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4, !llvm.access.group !59 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !59 +// CHECK17-NEXT: store i32 [[TMP18]], i32* [[TMP17]], align 4, !llvm.access.group !59 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !59 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11897,14 +12807,12 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11914,84 +12822,90 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK17-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !62 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !62 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP24]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !62 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK17-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK17-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12003,73 +12917,89 @@ // CHECK17-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK17-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65 +// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !65 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !65 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !65 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !65 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP14]], align 4, !llvm.access.group !65 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.16*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !65 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !65 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !65 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK17-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK17-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12078,13 +13008,11 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12094,51 +13022,55 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK17-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK17-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !68 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !68 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !68 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !68 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !68 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP18]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !68 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 -// CHECK17-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK17-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK17-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -12146,9 +13078,9 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK17-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12162,86 +13094,102 @@ // CHECK17-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK17-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.17*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.17* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.17*, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK17-NEXT: store %struct.anon.17* [[__CONTEXT]], %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], %struct.anon.17* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !71 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !71 -// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !71 +// CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !71 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 +// CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !71 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP14]], align 4, !llvm.access.group !71 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP15]], align 4, !llvm.access.group !71 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4, !llvm.access.group !71 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !71 +// CHECK17-NEXT: store i32 [[TMP18]], i32* [[TMP17]], align 4, !llvm.access.group !71 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !71 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !71 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !71 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12250,14 +13198,12 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12267,53 +13213,59 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK17-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK17-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !74 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !74 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !74 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !74 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !74 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP21]] // CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !74 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 -// CHECK17-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK17-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK17-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -12321,9 +13273,9 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK17-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12707,23 +13659,28 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12733,79 +13690,96 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !14 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP22]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP23]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP2]], i32** [[TMP24]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP25]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32* [[TMP6]], i32** [[TMP26]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !14 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK19-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -12818,15 +13792,11 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12839,87 +13809,91 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK19-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -12937,23 +13911,28 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12963,79 +13942,96 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !23 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP22]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP23]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP2]], i32** [[TMP24]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP25]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32* [[TMP6]], i32** [[TMP26]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !23 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK19-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -13048,15 +14044,11 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13069,87 +14061,91 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !26 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] -// CHECK19: omp.inner.for.end: -// CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19: omp.inner.for.end: +// CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK19: omp.loop.exit: +// CHECK19-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK19-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -13169,7 +14165,7 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -13178,22 +14174,26 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[N_ADDR]], i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13203,109 +14203,127 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !29 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: store i32 [[TMP24]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP25]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP26]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP2]], i32** [[TMP27]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP28]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32* [[TMP6]], i32** [[TMP29]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: store i32 [[TMP31]], i32* [[TMP30]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !29 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK19-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK19-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK19-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK19-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK19: cond.true11: -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK19-NEXT: br label [[COND_END13:%.*]] // CHECK19: cond.false12: -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK19-NEXT: br label [[COND_END13]] // CHECK19: cond.end13: -// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] +// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE11]] ], [ [[TMP41]], [[COND_FALSE12]] ] // CHECK19-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 -// CHECK19-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: store i32 [[TMP42]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) -// CHECK19-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK19-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) +// CHECK19-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK19-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK19-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP47]], 0 // CHECK19-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK19-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -13318,16 +14336,12 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13340,88 +14354,94 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !32 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !32 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !32 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP29]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK19-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK19-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK19-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -13439,23 +14459,28 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13465,79 +14490,96 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], %struct.anon.5* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], %struct.anon.5* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !35 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP22]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP23]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP2]], i32** [[TMP24]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP25]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32* [[TMP6]], i32** [[TMP26]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.6*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !35 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK19-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -13550,15 +14592,11 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13571,70 +14609,74 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP22]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !38 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !38 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP20]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !38 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP28]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -13642,12 +14684,12 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK19-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]] @@ -13667,7 +14709,7 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -13676,22 +14718,26 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[N_ADDR]], i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.7*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13701,84 +14747,102 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], %struct.anon.7* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], %struct.anon.7* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]]), !llvm.access.group !41 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: store i32 [[TMP22]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP24]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP25]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32* [[TMP2]], i32** [[TMP26]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP27]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32* [[TMP6]], i32** [[TMP28]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: store i32 [[TMP30]], i32* [[TMP29]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.8*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !41 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]]) -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK19-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP34]]) +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK19-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -13791,16 +14855,12 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13813,72 +14873,78 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP13]], i32 1073741859, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP25]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !44 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !44 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP21]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !44 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP31]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -13886,12 +14952,12 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK19-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -14052,73 +15118,89 @@ // CHECK19-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.9*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], %struct.anon.9* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP14]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.10*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !47 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK19-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -14127,13 +15209,11 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14143,63 +15223,67 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !50 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !50 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !50 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -14211,73 +15295,89 @@ // CHECK19-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.11*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], %struct.anon.11* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP14]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.12*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !53 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !53 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK19-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -14286,13 +15386,11 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14302,63 +15400,67 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !56 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !56 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !56 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !56 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !56 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !56 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -14372,86 +15474,102 @@ // CHECK19-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.13*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !59 -// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP14]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP15]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: store i32 [[TMP18]], i32* [[TMP17]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.14*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !59 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK19-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK19-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -14460,14 +15578,12 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14477,84 +15593,90 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP13]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 -// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !62 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !62 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP24]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !62 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK19-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK19-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -14566,73 +15688,89 @@ // CHECK19-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.15*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], %struct.anon.15* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP12]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP13]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP14]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.16*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !65 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !65 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK19-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -14641,13 +15779,11 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14657,51 +15793,55 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK19-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !68 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !68 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !68 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !68 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !68 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP18]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !68 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -14709,9 +15849,9 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK19-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK19-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -14725,86 +15865,102 @@ // CHECK19-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.17*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.17* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.17*, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.17* [[__CONTEXT]], %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], %struct.anon.17* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !71 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !71 -// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP14]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP15]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store [10 x i32]* [[TMP2]], [10 x i32]** [[TMP16]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: store i32 [[TMP18]], i32* [[TMP17]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.18*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group !71 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !71 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK19-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK19-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -14813,14 +15969,12 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14830,53 +15984,59 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK19-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP15]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !74 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !74 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !74 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !74 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !74 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP6]], i32 0, i32 [[TMP21]] // CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !74 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -14884,9 +16044,9 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK19-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_private_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_private_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_private_codegen.cpp @@ -256,15 +256,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -279,6 +281,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -294,70 +298,70 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[ARRAYIDX3]] to i8* -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[SIVAR]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[ARRAYIDX3]] to i8* +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP14]] // CHECK1-NEXT: store i32 [[ADD4]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i64 2 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -437,15 +441,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 @@ -461,6 +467,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 @@ -478,67 +486,67 @@ // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5]] to i8* -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP10]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5]] to i8* +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[TMP11]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -744,15 +752,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -767,6 +777,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -782,68 +794,68 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP10]] -// CHECK3-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[SIVAR]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP14]] // CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP19]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -923,15 +935,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 @@ -947,6 +961,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 @@ -964,65 +980,65 @@ // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* -// CHECK3-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP10]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[TMP11]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) // CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1222,18 +1238,20 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 @@ -1249,63 +1267,65 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32* undef, i32** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store i32* [[G1]], i32** [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK9-NEXT: store i32 1, i32* [[G]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK9-NEXT: store volatile i32 1, i32* [[TMP8]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK9-NEXT: store volatile i32 1, i32* [[TMP9]], align 4 // CHECK9-NEXT: store i32 2, i32* [[SIVAR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[G]], i32** [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP2]], align 8 -// CHECK9-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[G]], i32** [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP2]], align 8 +// CHECK9-NEXT: store i32* [[TMP12]], i32** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP13]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp @@ -122,19 +122,22 @@ // CHECK1-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -145,76 +148,78 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK1-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP14]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -286,19 +291,22 @@ // CHECK1-SAME: (i64 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -309,76 +317,78 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[T_VAR1]], align 4 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[T_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast i32* [[T_VAR1]] to i8* -// CHECK1-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP14]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -454,18 +464,21 @@ // CHECK3-SAME: (i32 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[SIVAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[SIVAR_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -476,76 +489,78 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP14]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -616,18 +631,21 @@ // CHECK3-SAME: (i32 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[T_VAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -638,76 +656,78 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[T_VAR1]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[T_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = bitcast i32* [[T_VAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP14]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -758,19 +778,22 @@ // CHECK9-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -782,79 +805,81 @@ // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[SIVAR1]], i32** [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK9-NEXT: store i32 [[ADD2]], i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP13]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP14:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK9-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP15]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP16:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK9-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP17]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP19]] monotonic, align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP21]] monotonic, align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void diff --git a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp @@ -369,6 +369,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 8 // CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK1-NEXT: store i64 [[TE]], i64* [[TE_ADDR]], align 8 // CHECK1-NEXT: store i64 [[TH]], i64* [[TH_ADDR]], align 8 @@ -383,112 +384,118 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]], i32* [[CONV3]], [100 x i32]* [[TMP1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[CONV3]], i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [100 x i32]* [[TMP1]], [100 x i32]** [[TMP6]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[I3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP6]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i64 16) ] // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK1-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK1-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 +// CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[TMP2]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -501,21 +508,25 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[CONV]], [100 x i32]* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store [100 x i32]* [[TMP0]], [100 x i32]** [[TMP2]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -528,80 +539,82 @@ // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP4]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 // CHECK1-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -773,6 +786,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 4 // CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK3-NEXT: store i32 [[TE]], i32* [[TE_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TH]], i32* [[TH_ADDR]], align 4 @@ -783,111 +797,117 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TE_ADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TH_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[I_ADDR]], i32* [[N_ADDR]], [100 x i32]* [[TMP1]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[I_ADDR]], i32** [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [100 x i32]* [[TMP1]], [100 x i32]** [[TMP6]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[I:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP6]], i32 0, i32 0 // CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i32 16) ] // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP2]], i32 0, i32 [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP6]], i32 0, i32 [[TMP21]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK3-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK3-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK3-NEXT: store i32 [[ADD11]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 +// CHECK3-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// CHECK3-NEXT: store i32 [[ADD10]], i32* [[TMP2]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] @@ -900,20 +920,24 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], [100 x i32]* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[N_ADDR]], i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store [100 x i32]* [[TMP0]], [100 x i32]** [[TMP2]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], [100 x i32]* noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [100 x i32]*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -926,79 +950,81 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: store [100 x i32]* [[A]], [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load [100 x i32]*, [100 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP1]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP4]], i32 0, i32 [[TMP19]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP19]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 // CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK3-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1373,24 +1399,29 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1403,82 +1434,84 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1584,23 +1617,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1613,81 +1651,83 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !5 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !5 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP17]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !5 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1935,18 +1975,21 @@ // CHECK17-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1956,62 +1999,64 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK17-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 -// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP9:%.*]] = load float, float* [[B]], align 4, !llvm.access.group !4 -// CHECK17-NEXT: [[CONV:%.*]] = fptosi float [[TMP9]] to i32 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP11:%.*]] = load float, float* [[B]], align 4, !llvm.access.group !4 +// CHECK17-NEXT: [[CONV:%.*]] = fptosi float [[TMP11]] to i32 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 [[CONV]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK17-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 123, i32* [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2103,18 +2148,21 @@ // CHECK19-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2124,61 +2172,63 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK19-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK19-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP9:%.*]] = load float, float* [[B]], align 4, !llvm.access.group !5 -// CHECK19-NEXT: [[CONV:%.*]] = fptosi float [[TMP9]] to i32 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP10]] +// CHECK19-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP11:%.*]] = load float, float* [[B]], align 4, !llvm.access.group !5 +// CHECK19-NEXT: [[CONV:%.*]] = fptosi float [[TMP11]] to i32 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] // CHECK19-NEXT: store i32 [[CONV]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK19-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 123, i32* [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2271,7 +2321,7 @@ // CHECK21-NEXT: entry: // CHECK21-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 // CHECK21-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK21-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 // CHECK21-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 1 @@ -2279,23 +2329,24 @@ // CHECK21-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00 // CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK21-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP2]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK21-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK21-NEXT: store i8 [[FROMBOOL2]], i8* [[CONV]], align 1 -// CHECK21-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]], i64 [[TMP3]]) +// CHECK21-NEXT: store i8 [[FROMBOOL2]], i8* [[TMP3]], align 8 +// CHECK21-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK21-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK21-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2305,59 +2356,64 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK21-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i8, i8* [[TMP3]], align 8 +// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 +// CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK21-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK21-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 122 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 -// CHECK21-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP10:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP10]] to i1 +// CHECK21-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 +// CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 -// CHECK21-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 1 -// CHECK21-NEXT: [[TMP10:%.*]] = load float, float* [[B]], align 4, !nontemporal !5, !llvm.access.group !4 -// CHECK21-NEXT: [[CONV2:%.*]] = fptosi float [[TMP10]] to i32 -// CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !4 -// CHECK21-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK21-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP14:%.*]] = load float, float* [[B]], align 4, !nontemporal !5, !llvm.access.group !4 +// CHECK21-NEXT: [[CONV:%.*]] = fptosi float [[TMP14]] to i32 +// CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !4 +// CHECK21-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK21-NEXT: store i32 [[CONV2]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !4 +// CHECK21-NEXT: store i32 [[CONV]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !4 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK21-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK21: omp.inner.for.end: @@ -2365,29 +2421,29 @@ // CHECK21: omp_if.else: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND4:%.*]] // CHECK21: omp.inner.for.cond4: -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK21-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY6:%.*]], label [[OMP_INNER_FOR_END17:%.*]] // CHECK21: omp.inner.for.body6: -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK21-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] // CHECK21-NEXT: store i32 [[ADD8]], i32* [[I]], align 4 -// CHECK21-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 1 -// CHECK21-NEXT: [[TMP16:%.*]] = load float, float* [[B9]], align 4 -// CHECK21-NEXT: [[CONV10:%.*]] = fptosi float [[TMP16]] to i32 -// CHECK21-NEXT: [[A11:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK21-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK21-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP20:%.*]] = load float, float* [[B9]], align 4 +// CHECK21-NEXT: [[CONV10:%.*]] = fptosi float [[TMP20]] to i32 +// CHECK21-NEXT: [[A11:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK21-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP21]] to i64 // CHECK21-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A11]], i64 0, i64 [[IDXPROM12]] // CHECK21-NEXT: store i32 [[CONV10]], i32* [[ARRAYIDX13]], align 4 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] // CHECK21: omp.body.continue14: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] // CHECK21: omp.inner.for.inc15: -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK21-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK21-NEXT: store i32 [[ADD16]], i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND4]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK21: omp.inner.for.end17: @@ -2395,10 +2451,10 @@ // CHECK21: omp_if.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK21-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK21-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK21-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 123, i32* [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2491,7 +2547,7 @@ // CHECK23-NEXT: entry: // CHECK23-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK23-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 // CHECK23-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 1 @@ -2499,23 +2555,24 @@ // CHECK23-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00 // CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK23-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[TMP2:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP2]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK23-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK23-NEXT: store i8 [[FROMBOOL2]], i8* [[CONV]], align 1 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]], i32 [[TMP3]]) +// CHECK23-NEXT: store i8 [[FROMBOOL2]], i8* [[TMP3]], align 4 +// CHECK23-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK23-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK23-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2525,58 +2582,63 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK23-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i8, i8* [[TMP3]], align 4 +// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 +// CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK23-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK23-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 122 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 -// CHECK23-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP10]] to i1 +// CHECK23-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK23-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK23-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK23-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 1 -// CHECK23-NEXT: [[TMP10:%.*]] = load float, float* [[B]], align 4, !nontemporal !6, !llvm.access.group !5 -// CHECK23-NEXT: [[CONV2:%.*]] = fptosi float [[TMP10]] to i32 -// CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK23-NEXT: store i32 [[CONV2]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK23-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP14:%.*]] = load float, float* [[B]], align 4, !nontemporal !6, !llvm.access.group !5 +// CHECK23-NEXT: [[CONV:%.*]] = fptosi float [[TMP14]] to i32 +// CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP15]] +// CHECK23-NEXT: store i32 [[CONV]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK23-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK23: omp.inner.for.end: @@ -2584,28 +2646,28 @@ // CHECK23: omp_if.else: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND4:%.*]] // CHECK23: omp.inner.for.cond4: -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK23-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY6:%.*]], label [[OMP_INNER_FOR_END16:%.*]] // CHECK23: omp.inner.for.body6: -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK23-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] // CHECK23-NEXT: store i32 [[ADD8]], i32* [[I]], align 4 -// CHECK23-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 1 -// CHECK23-NEXT: [[TMP16:%.*]] = load float, float* [[B9]], align 4 -// CHECK23-NEXT: [[CONV10:%.*]] = fptosi float [[TMP16]] to i32 -// CHECK23-NEXT: [[A11:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 -// CHECK23-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A11]], i32 0, i32 [[TMP17]] +// CHECK23-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP20:%.*]] = load float, float* [[B9]], align 4 +// CHECK23-NEXT: [[CONV10:%.*]] = fptosi float [[TMP20]] to i32 +// CHECK23-NEXT: [[A11:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK23-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A11]], i32 0, i32 [[TMP21]] // CHECK23-NEXT: store i32 [[CONV10]], i32* [[ARRAYIDX12]], align 4 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE13:%.*]] // CHECK23: omp.body.continue13: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC14:%.*]] // CHECK23: omp.inner.for.inc14: -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK23-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND4]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK23: omp.inner.for.end16: @@ -2613,10 +2675,10 @@ // CHECK23: omp_if.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK23-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK23-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK23-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 123, i32* [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3040,24 +3102,29 @@ // CHECK33-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK33-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK33-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK33-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK33-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK33-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK33-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK33-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK33-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK33-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK33-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK33-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK33-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK33-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK33-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK33-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK33-NEXT: ret void // // // CHECK33-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK33-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK33-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK33-NEXT: entry: // CHECK33-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK33-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK33-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK33-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK33-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK33-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK33-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK33-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK33-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3070,82 +3137,84 @@ // CHECK33-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK33-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK33-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK33-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK33-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK33-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK33-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK33-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK33-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK33-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK33-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK33-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK33-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK33-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK33-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK33-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK33-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK33-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK33-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK33-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK33-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK33-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK33-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK33-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK33-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK33-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK33-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK33-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK33-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK33-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK33-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK33-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK33: omp.precond.then: // CHECK33-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK33-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK33-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK33-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK33-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK33-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK33-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK33-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK33-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK33-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK33-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK33-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK33-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK33-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK33-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK33-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK33-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK33-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK33-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK33-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK33: cond.true: -// CHECK33-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK33-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK33-NEXT: br label [[COND_END:%.*]] // CHECK33: cond.false: -// CHECK33-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK33-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK33-NEXT: br label [[COND_END]] // CHECK33: cond.end: -// CHECK33-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK33-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK33-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK33-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK33-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK33-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK33-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK33-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK33: omp.inner.for.cond: -// CHECK33-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK33-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK33-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK33-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK33-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK33-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK33-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK33: omp.inner.for.body: -// CHECK33-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK33-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK33-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK33-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK33-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK33-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !5 -// CHECK33-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !5 -// CHECK33-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK33-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK33-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !5 +// CHECK33-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK33-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[IDXPROM]] // CHECK33-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 // CHECK33-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK33: omp.body.continue: // CHECK33-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK33: omp.inner.for.inc: -// CHECK33-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK33-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK33-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK33-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK33-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK33-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK33: omp.inner.for.end: // CHECK33-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK33: omp.loop.exit: -// CHECK33-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK33-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK33-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK33-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK33-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK33-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK33-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK33-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK33-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK33-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK33-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK33-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK33: .omp.final.then: -// CHECK33-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK33-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK33-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK33-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK33-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK33-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK33-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -3225,6 +3294,7 @@ // CHECK33-NEXT: [[TE_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK33-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK33-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK33-NEXT: store i64 [[TE]], i64* [[TE_ADDR]], align 8 // CHECK33-NEXT: store i64 [[TH]], i64* [[TH_ADDR]], align 8 @@ -3235,16 +3305,18 @@ // CHECK33-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK33-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK33-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK33-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) +// CHECK33-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK33-NEXT: store [10 x i32]* [[TMP1]], [10 x i32]** [[TMP4]], align 8 +// CHECK33-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK33-NEXT: ret void // // // CHECK33-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK33-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK33-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK33-NEXT: entry: // CHECK33-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK33-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK33-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK33-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK33-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK33-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK33-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3254,58 +3326,60 @@ // CHECK33-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK33-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK33-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK33-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK33-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK33-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK33-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK33-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK33-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK33-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK33-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK33-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK33-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK33-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK33-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK33-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK33-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK33-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK33-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK33-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK33-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK33-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK33: cond.true: // CHECK33-NEXT: br label [[COND_END:%.*]] // CHECK33: cond.false: -// CHECK33-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK33-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK33-NEXT: br label [[COND_END]] // CHECK33: cond.end: -// CHECK33-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK33-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK33-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK33-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK33-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK33-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK33-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK33-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK33: omp.inner.for.cond: -// CHECK33-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK33-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK33-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK33-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK33-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK33-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK33-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK33: omp.inner.for.body: -// CHECK33-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK33-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK33-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK33-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK33-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK33-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK33-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK33-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK33-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK33-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK33-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK33-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK33-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK33-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK33: omp.body.continue: // CHECK33-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK33: omp.inner.for.inc: -// CHECK33-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK33-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK33-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK33-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK33-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK33-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK33: omp.inner.for.end: // CHECK33-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK33: omp.loop.exit: -// CHECK33-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK33-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK33-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK33-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK33-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK33-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK33-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK33-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK33: .omp.final.then: // CHECK33-NEXT: store i32 10, i32* [[I]], align 4 // CHECK33-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3415,23 +3489,28 @@ // CHECK35-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK35-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK35-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK35-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK35-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK35-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK35-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK35-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK35-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK35-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK35-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK35-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK35-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK35-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK35-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK35-NEXT: ret void // // // CHECK35-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK35-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK35-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK35-NEXT: entry: // CHECK35-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK35-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK35-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK35-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK35-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK35-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK35-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3444,81 +3523,83 @@ // CHECK35-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK35-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK35-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK35-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK35-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK35-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK35-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK35-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK35-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK35-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK35-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK35-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK35-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK35-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK35-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK35-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK35-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK35-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK35-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK35-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK35-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK35-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK35-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK35-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK35-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK35-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK35-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK35-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK35-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK35-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK35-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK35-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK35: omp.precond.then: // CHECK35-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK35-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK35-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK35-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK35-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK35-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK35-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK35-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK35-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK35-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK35-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK35-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK35-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK35-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK35-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK35-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK35-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK35-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK35-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK35-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK35: cond.true: -// CHECK35-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK35-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK35-NEXT: br label [[COND_END:%.*]] // CHECK35: cond.false: -// CHECK35-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK35-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK35-NEXT: br label [[COND_END]] // CHECK35: cond.end: -// CHECK35-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK35-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK35-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK35-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK35-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK35-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK35-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK35-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK35: omp.inner.for.cond: -// CHECK35-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK35-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK35-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK35-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK35-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK35-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK35-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK35: omp.inner.for.body: -// CHECK35-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK35-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK35-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK35-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK35-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK35-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !6 -// CHECK35-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !6 -// CHECK35-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP17]] +// CHECK35-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !6 +// CHECK35-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 [[TMP21]] // CHECK35-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 // CHECK35-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK35: omp.body.continue: // CHECK35-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK35: omp.inner.for.inc: -// CHECK35-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK35-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK35-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK35-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK35-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK35-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK35: omp.inner.for.end: // CHECK35-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK35: omp.loop.exit: -// CHECK35-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK35-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK35-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK35-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK35-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK35-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK35-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK35-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK35-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK35-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK35-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK35-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK35: .omp.final.then: -// CHECK35-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK35-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK35-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK35-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK35-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK35-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK35-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -3596,6 +3677,7 @@ // CHECK35-NEXT: [[TE_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK35-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK35-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK35-NEXT: store i32 [[TE]], i32* [[TE_ADDR]], align 4 // CHECK35-NEXT: store i32 [[TH]], i32* [[TH_ADDR]], align 4 @@ -3604,16 +3686,18 @@ // CHECK35-NEXT: [[TMP2:%.*]] = load i32, i32* [[TE_ADDR]], align 4 // CHECK35-NEXT: [[TMP3:%.*]] = load i32, i32* [[TH_ADDR]], align 4 // CHECK35-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK35-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) +// CHECK35-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK35-NEXT: store [10 x i32]* [[TMP1]], [10 x i32]** [[TMP4]], align 4 +// CHECK35-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK35-NEXT: ret void // // // CHECK35-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK35-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK35-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK35-NEXT: entry: // CHECK35-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK35-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK35-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK35-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK35-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3623,57 +3707,59 @@ // CHECK35-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK35-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK35-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK35-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK35-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK35-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK35-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK35-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK35-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK35-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK35-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK35-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK35-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK35-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK35-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK35-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK35-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK35-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK35-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK35-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK35-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK35-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK35: cond.true: // CHECK35-NEXT: br label [[COND_END:%.*]] // CHECK35: cond.false: -// CHECK35-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK35-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK35-NEXT: br label [[COND_END]] // CHECK35: cond.end: -// CHECK35-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK35-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK35-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK35-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK35-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK35-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK35-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK35-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK35: omp.inner.for.cond: -// CHECK35-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK35-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK35-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK35-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK35-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK35-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK35-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK35: omp.inner.for.body: -// CHECK35-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK35-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK35-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK35-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK35-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK35-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK35-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK35-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK35-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK35-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 [[TMP11]] // CHECK35-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK35-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK35: omp.body.continue: // CHECK35-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK35: omp.inner.for.inc: -// CHECK35-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK35-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK35-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK35-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK35-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK35-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK35: omp.inner.for.end: // CHECK35-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK35: omp.loop.exit: -// CHECK35-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK35-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK35-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK35-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK35-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK35-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK35-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK35-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK35: .omp.final.then: // CHECK35-NEXT: store i32 10, i32* [[I]], align 4 // CHECK35-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3799,7 +3885,7 @@ // CHECK37-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK37-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK37-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK37-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK37-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK37-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 // CHECK37-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK37-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -3812,25 +3898,28 @@ // CHECK37-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK37-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK37-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK37-NEXT: [[TMP3:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK37-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP3]] to i1 -// CHECK37-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK37-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK37-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV3]], align 1 -// CHECK37-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK37-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]]) +// CHECK37-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK37-NEXT: store i32* [[CONV1]], i32** [[TMP3]], align 8 +// CHECK37-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK37-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK37-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK37-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK37-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK37-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK37-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK37-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL2]] to i8 +// CHECK37-NEXT: store i8 [[FROMBOOL3]], i8* [[TMP6]], align 8 +// CHECK37-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK37-NEXT: ret void // // // CHECK37-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK37-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK37-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK37-NEXT: entry: // CHECK37-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK37-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK37-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK37-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK37-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK37-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK37-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK37-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK37-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK37-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK37-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3843,120 +3932,125 @@ // CHECK37-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK37-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK37-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK37-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK37-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK37-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK37-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK37-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK37-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK37-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK37-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK37-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK37-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK37-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK37-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK37-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK37-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK37-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK37-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK37-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK37-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK37-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK37-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK37-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK37-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 8 +// CHECK37-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK37-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK37-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK37-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK37-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK37-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK37-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK37-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK37-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK37-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK37-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK37-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK37-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK37-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK37-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK37-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK37: omp.precond.then: // CHECK37-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK37-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK37-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK37-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK37-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 // CHECK37-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK37-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK37-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK37-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK37-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK37-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK37-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK37-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK37-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK37-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK37-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK37-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK37-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK37-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK37-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK37: cond.true: -// CHECK37-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK37-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK37-NEXT: br label [[COND_END:%.*]] // CHECK37: cond.false: -// CHECK37-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK37-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK37-NEXT: br label [[COND_END]] // CHECK37: cond.end: -// CHECK37-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK37-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK37-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK37-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK37-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK37-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK37-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK37-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK37-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: [[TMP20:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK37-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK37-NEXT: br i1 [[TOBOOL6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK37: omp_if.then: // CHECK37-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK37: omp.inner.for.cond: -// CHECK37-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK37-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK37-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK37-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK37-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK37-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK37-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK37-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK37: omp.inner.for.body: -// CHECK37-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK37-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK37-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK37-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK37-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK37-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !5 -// CHECK37-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !5 -// CHECK37-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK37-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK37-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !5 +// CHECK37-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK37-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[IDXPROM]] // CHECK37-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 // CHECK37-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK37: omp.body.continue: // CHECK37-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK37: omp.inner.for.inc: -// CHECK37-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK37-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK37-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK37-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK37-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK37-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK37-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK37: omp.inner.for.end: // CHECK37-NEXT: br label [[OMP_IF_END:%.*]] // CHECK37: omp_if.else: -// CHECK37-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] -// CHECK37: omp.inner.for.cond8: -// CHECK37-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK37-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK37-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END18:%.*]] -// CHECK37: omp.inner.for.body10: -// CHECK37-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP22]], 1 -// CHECK37-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK37-NEXT: store i32 [[ADD12]], i32* [[I4]], align 4 -// CHECK37-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK37-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK37-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM13]] -// CHECK37-NEXT: store i32 0, i32* [[ARRAYIDX14]], align 4 -// CHECK37-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] -// CHECK37: omp.body.continue15: -// CHECK37-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] -// CHECK37: omp.inner.for.inc16: -// CHECK37-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK37-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP9:![0-9]+]] -// CHECK37: omp.inner.for.end18: +// CHECK37-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] +// CHECK37: omp.inner.for.cond9: +// CHECK37-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK37-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK37-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END19:%.*]] +// CHECK37: omp.inner.for.body11: +// CHECK37-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP28]], 1 +// CHECK37-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK37-NEXT: store i32 [[ADD13]], i32* [[I4]], align 4 +// CHECK37-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK37-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK37-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[IDXPROM14]] +// CHECK37-NEXT: store i32 0, i32* [[ARRAYIDX15]], align 4 +// CHECK37-NEXT: br label [[OMP_BODY_CONTINUE16:%.*]] +// CHECK37: omp.body.continue16: +// CHECK37-NEXT: br label [[OMP_INNER_FOR_INC17:%.*]] +// CHECK37: omp.inner.for.inc17: +// CHECK37-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK37-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK37: omp.inner.for.end19: // CHECK37-NEXT: br label [[OMP_IF_END]] // CHECK37: omp_if.end: // CHECK37-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK37: omp.loop.exit: -// CHECK37-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK37-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK37-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK37-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK37-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK37-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK37-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK37-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK37-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK37-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK37-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK37-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK37: .omp.final.then: -// CHECK37-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK37-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK37-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 -// CHECK37-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 -// CHECK37-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] -// CHECK37-NEXT: store i32 [[ADD22]], i32* [[I4]], align 4 +// CHECK37-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK37-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK37-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK37-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 +// CHECK37-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] +// CHECK37-NEXT: store i32 [[ADD23]], i32* [[I4]], align 4 // CHECK37-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK37: .omp.final.done: // CHECK37-NEXT: br label [[OMP_PRECOND_END]] @@ -4032,6 +4126,7 @@ // CHECK37-NEXT: [[TE_ADDR:%.*]] = alloca i64, align 8 // CHECK37-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK37-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK37-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK37-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK37-NEXT: store i64 [[TE]], i64* [[TE_ADDR]], align 8 // CHECK37-NEXT: store i64 [[TH]], i64* [[TH_ADDR]], align 8 @@ -4042,16 +4137,18 @@ // CHECK37-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK37-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK37-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK37-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) +// CHECK37-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK37-NEXT: store [10 x i32]* [[TMP1]], [10 x i32]** [[TMP4]], align 8 +// CHECK37-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK37-NEXT: ret void // // // CHECK37-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK37-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK37-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK37-NEXT: entry: // CHECK37-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK37-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK37-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK37-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK37-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK37-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK37-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4061,58 +4158,60 @@ // CHECK37-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK37-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK37-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK37-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK37-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK37-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK37-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK37-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK37-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK37-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK37-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK37-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK37-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK37-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK37-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK37-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK37-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK37-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK37-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK37-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK37-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK37-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK37-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK37-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK37: cond.true: // CHECK37-NEXT: br label [[COND_END:%.*]] // CHECK37: cond.false: -// CHECK37-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK37-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK37-NEXT: br label [[COND_END]] // CHECK37: cond.end: -// CHECK37-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK37-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK37-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK37-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK37-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK37-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK37-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK37: omp.inner.for.cond: -// CHECK37-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK37-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK37-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK37-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK37-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK37-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK37-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK37: omp.inner.for.body: -// CHECK37-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK37-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK37-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK37-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK37-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK37-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK37-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 -// CHECK37-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK37-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK37-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK37-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK37-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK37-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK37-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK37: omp.body.continue: // CHECK37-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK37: omp.inner.for.inc: -// CHECK37-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK37-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK37-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK37-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK37-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK37-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK37: omp.inner.for.end: // CHECK37-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK37: omp.loop.exit: -// CHECK37-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK37-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK37-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK37-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK37-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK37-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK37-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK37-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK37: .omp.final.then: // CHECK37-NEXT: store i32 10, i32* [[I]], align 4 // CHECK37-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4236,7 +4335,7 @@ // CHECK39-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK39-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK39-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK39-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK39-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK39-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK39-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK39-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -4247,25 +4346,28 @@ // CHECK39-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK39-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK39-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK39-NEXT: [[TMP3:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 -// CHECK39-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP3]] to i1 -// CHECK39-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK39-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK39-NEXT: store i32* [[N_ADDR]], i32** [[TMP3]], align 4 +// CHECK39-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK39-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK39-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK39-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK39-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK39-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK39-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP7]] to i1 // CHECK39-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK39-NEXT: store i8 [[FROMBOOL2]], i8* [[CONV]], align 1 -// CHECK39-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK39-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]]) +// CHECK39-NEXT: store i8 [[FROMBOOL2]], i8* [[TMP6]], align 4 +// CHECK39-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK39-NEXT: ret void // // // CHECK39-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK39-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK39-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK39-NEXT: entry: // CHECK39-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK39-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK39-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK39-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK39-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK39-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK39-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK39-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK39-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK39-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK39-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4278,118 +4380,123 @@ // CHECK39-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK39-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK39-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK39-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK39-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK39-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK39-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK39-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK39-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK39-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK39-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK39-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK39-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK39-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK39-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK39-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK39-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK39-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK39-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK39-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK39-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK39-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK39-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK39-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK39-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 4 +// CHECK39-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK39-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK39-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK39-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK39-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK39-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK39-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK39-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK39-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK39-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK39-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK39-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK39-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK39-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK39-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK39-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK39: omp.precond.then: // CHECK39-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK39-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK39-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK39-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK39-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 // CHECK39-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK39-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK39-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK39-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK39-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK39-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK39-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK39-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK39-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK39-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK39-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK39-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK39-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK39-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK39-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK39: cond.true: -// CHECK39-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK39-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK39-NEXT: br label [[COND_END:%.*]] // CHECK39: cond.false: -// CHECK39-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK39-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK39-NEXT: br label [[COND_END]] // CHECK39: cond.end: -// CHECK39-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK39-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK39-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK39-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK39-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK39-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK39-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK39-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK39-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: [[TMP20:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK39-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK39-NEXT: br i1 [[TOBOOL6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK39: omp_if.then: // CHECK39-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK39: omp.inner.for.cond: -// CHECK39-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK39-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK39-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK39-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK39-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK39-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK39-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK39-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK39: omp.inner.for.body: -// CHECK39-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK39-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK39-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK39-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK39-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK39-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !6 -// CHECK39-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !6 -// CHECK39-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP18]] +// CHECK39-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !6 +// CHECK39-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 [[TMP24]] // CHECK39-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 // CHECK39-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK39: omp.body.continue: // CHECK39-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK39: omp.inner.for.inc: -// CHECK39-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK39-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK39-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK39-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK39-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK39-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK39-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK39: omp.inner.for.end: // CHECK39-NEXT: br label [[OMP_IF_END:%.*]] // CHECK39: omp_if.else: -// CHECK39-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] -// CHECK39: omp.inner.for.cond8: -// CHECK39-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK39-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK39-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK39: omp.inner.for.body10: -// CHECK39-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP22]], 1 -// CHECK39-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK39-NEXT: store i32 [[ADD12]], i32* [[I4]], align 4 -// CHECK39-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4 -// CHECK39-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP23]] -// CHECK39-NEXT: store i32 0, i32* [[ARRAYIDX13]], align 4 -// CHECK39-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK39: omp.body.continue14: -// CHECK39-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK39: omp.inner.for.inc15: -// CHECK39-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK39-NEXT: store i32 [[ADD16]], i32* [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP10:![0-9]+]] -// CHECK39: omp.inner.for.end17: +// CHECK39-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] +// CHECK39: omp.inner.for.cond9: +// CHECK39-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK39-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK39-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK39: omp.inner.for.body11: +// CHECK39-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP28]], 1 +// CHECK39-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK39-NEXT: store i32 [[ADD13]], i32* [[I4]], align 4 +// CHECK39-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK39-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 [[TMP29]] +// CHECK39-NEXT: store i32 0, i32* [[ARRAYIDX14]], align 4 +// CHECK39-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK39: omp.body.continue15: +// CHECK39-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK39: omp.inner.for.inc16: +// CHECK39-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK39-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK39: omp.inner.for.end18: // CHECK39-NEXT: br label [[OMP_IF_END]] // CHECK39: omp_if.end: // CHECK39-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK39: omp.loop.exit: -// CHECK39-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK39-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK39-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) -// CHECK39-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK39-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK39-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK39-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK39-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK39-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK39-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK39-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK39-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK39: .omp.final.then: -// CHECK39-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK39-NEXT: [[SUB18:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK39-NEXT: [[DIV19:%.*]] = sdiv i32 [[SUB18]], 1 -// CHECK39-NEXT: [[MUL20:%.*]] = mul nsw i32 [[DIV19]], 1 -// CHECK39-NEXT: [[ADD21:%.*]] = add nsw i32 0, [[MUL20]] -// CHECK39-NEXT: store i32 [[ADD21]], i32* [[I4]], align 4 +// CHECK39-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK39-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK39-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 +// CHECK39-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 +// CHECK39-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] +// CHECK39-NEXT: store i32 [[ADD22]], i32* [[I4]], align 4 // CHECK39-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK39: .omp.final.done: // CHECK39-NEXT: br label [[OMP_PRECOND_END]] @@ -4463,6 +4570,7 @@ // CHECK39-NEXT: [[TE_ADDR:%.*]] = alloca i32, align 4 // CHECK39-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK39-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK39-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK39-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK39-NEXT: store i32 [[TE]], i32* [[TE_ADDR]], align 4 // CHECK39-NEXT: store i32 [[TH]], i32* [[TH_ADDR]], align 4 @@ -4471,16 +4579,18 @@ // CHECK39-NEXT: [[TMP2:%.*]] = load i32, i32* [[TE_ADDR]], align 4 // CHECK39-NEXT: [[TMP3:%.*]] = load i32, i32* [[TH_ADDR]], align 4 // CHECK39-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK39-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) +// CHECK39-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK39-NEXT: store [10 x i32]* [[TMP1]], [10 x i32]** [[TMP4]], align 4 +// CHECK39-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK39-NEXT: ret void // // // CHECK39-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK39-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK39-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK39-NEXT: entry: // CHECK39-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK39-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK39-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK39-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK39-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK39-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK39-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4490,57 +4600,59 @@ // CHECK39-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK39-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK39-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK39-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK39-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK39-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK39-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK39-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK39-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK39-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK39-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK39-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK39-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK39-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK39-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK39-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK39-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK39-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK39-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK39-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK39-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK39-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK39-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK39-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK39: cond.true: // CHECK39-NEXT: br label [[COND_END:%.*]] // CHECK39: cond.false: -// CHECK39-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK39-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK39-NEXT: br label [[COND_END]] // CHECK39: cond.end: -// CHECK39-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK39-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK39-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK39-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK39-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK39-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK39-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK39: omp.inner.for.cond: -// CHECK39-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK39-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 -// CHECK39-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK39-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK39-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK39-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK39-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK39: omp.inner.for.body: -// CHECK39-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK39-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK39-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK39-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK39-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK39-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK39-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 -// CHECK39-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK39-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK39-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 [[TMP11]] // CHECK39-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK39-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK39: omp.body.continue: // CHECK39-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK39: omp.inner.for.inc: -// CHECK39-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK39-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK39-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK39-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK39-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK39-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK39: omp.inner.for.end: // CHECK39-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK39: omp.loop.exit: -// CHECK39-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK39-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK39-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK39-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK39-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK39-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK39-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK39-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK39: .omp.final.then: // CHECK39-NEXT: store i32 10, i32* [[I]], align 4 // CHECK39-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp @@ -147,18 +147,21 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -170,71 +173,73 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK1-NEXT: store i32 [[ADD6]], i32* [[J]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: store i32 456, i32* [[J]], align 4 @@ -299,18 +304,21 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -322,69 +330,71 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], i32* [[J]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i32 0, i32 [[TMP13]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: store i32 456, i32* [[J]], align 4 @@ -676,6 +686,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], i64* [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -686,169 +697,177 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i64, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], i32* [[CONV3]], i64 [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV3]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], i64* [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[TMP2]], i32** [[TMP7]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[M]], i32** [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[M_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 // CHECK9-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]] -// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV21]], i32* [[I11]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]] -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK9-NEXT: store i32 [[CONV35]], i32* [[J12]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I11]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[J12]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM36]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX37]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], i32* [[I9]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], i32* [[J10]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[I9]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[J10]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX35]], align 4, !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK9-NEXT: store i64 [[ADD38]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 -// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK9-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB39:%.*]] = sub nsw i32 [[TMP35]], 0 -// CHECK9-NEXT: [[DIV40:%.*]] = sdiv i32 [[SUB39]], 1 -// CHECK9-NEXT: [[MUL41:%.*]] = mul nsw i32 [[DIV40]], 1 -// CHECK9-NEXT: [[ADD42:%.*]] = add nsw i32 0, [[MUL41]] -// CHECK9-NEXT: store i32 [[ADD42]], i32* [[I11]], align 4 -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB43:%.*]] = sub nsw i32 [[TMP36]], 0 -// CHECK9-NEXT: [[DIV44:%.*]] = sdiv i32 [[SUB43]], 1 -// CHECK9-NEXT: [[MUL45:%.*]] = mul nsw i32 [[DIV44]], 1 -// CHECK9-NEXT: [[ADD46:%.*]] = add nsw i32 0, [[MUL45]] -// CHECK9-NEXT: store i32 [[ADD46]], i32* [[J12]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB37:%.*]] = sub nsw i32 [[TMP41]], 0 +// CHECK9-NEXT: [[DIV38:%.*]] = sdiv i32 [[SUB37]], 1 +// CHECK9-NEXT: [[MUL39:%.*]] = mul nsw i32 [[DIV38]], 1 +// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL39]] +// CHECK9-NEXT: store i32 [[ADD40]], i32* [[I9]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB41:%.*]] = sub nsw i32 [[TMP42]], 0 +// CHECK9-NEXT: [[DIV42:%.*]] = sdiv i32 [[SUB41]], 1 +// CHECK9-NEXT: [[MUL43:%.*]] = mul nsw i32 [[DIV42]], 1 +// CHECK9-NEXT: [[ADD44:%.*]] = add nsw i32 0, [[MUL43]] +// CHECK9-NEXT: store i32 [[ADD44]], i32* [[J10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -892,18 +911,21 @@ // CHECK9-SAME: ([10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x [2 x i32]]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x [2 x i32]]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x [2 x i32]]* [[TMP0]], [10 x [2 x i32]]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -915,70 +937,72 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK9-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK9-NEXT: store i32 [[ADD6]], i32* [[J]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !11 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: store i32 2, i32* [[J]], align 4 @@ -1127,6 +1151,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], i32* [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -1135,167 +1160,175 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32* [[M_ADDR]], i32 [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[M_ADDR]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[TMP2]], i32** [[TMP7]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[M]], i32** [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[M_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 // CHECK11-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]] -// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV21]], i32* [[I11]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]] -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK11-NEXT: store i32 [[CONV35]], i32* [[J12]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I11]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP3]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[J12]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i32 [[TMP29]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX36]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK11-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK11-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK11-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK11-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK11-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK11-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV19]], i32* [[I9]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK11-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK11-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK11-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK11-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK11-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK11-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK11-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK11-NEXT: store i32 [[CONV33]], i32* [[J10]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[I9]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP34:%.*]] = mul nsw i32 [[TMP33]], [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[J10]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i32 [[TMP35]] +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX34]], align 4, !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK11-NEXT: store i64 [[ADD37]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[ADD35:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK11-NEXT: store i64 [[ADD35]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 -// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]]) +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB38:%.*]] = sub nsw i32 [[TMP35]], 0 -// CHECK11-NEXT: [[DIV39:%.*]] = sdiv i32 [[SUB38]], 1 -// CHECK11-NEXT: [[MUL40:%.*]] = mul nsw i32 [[DIV39]], 1 -// CHECK11-NEXT: [[ADD41:%.*]] = add nsw i32 0, [[MUL40]] -// CHECK11-NEXT: store i32 [[ADD41]], i32* [[I11]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB42:%.*]] = sub nsw i32 [[TMP36]], 0 -// CHECK11-NEXT: [[DIV43:%.*]] = sdiv i32 [[SUB42]], 1 -// CHECK11-NEXT: [[MUL44:%.*]] = mul nsw i32 [[DIV43]], 1 -// CHECK11-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] -// CHECK11-NEXT: store i32 [[ADD45]], i32* [[J12]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB36:%.*]] = sub nsw i32 [[TMP41]], 0 +// CHECK11-NEXT: [[DIV37:%.*]] = sdiv i32 [[SUB36]], 1 +// CHECK11-NEXT: [[MUL38:%.*]] = mul nsw i32 [[DIV37]], 1 +// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]] +// CHECK11-NEXT: store i32 [[ADD39]], i32* [[I9]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB40:%.*]] = sub nsw i32 [[TMP42]], 0 +// CHECK11-NEXT: [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1 +// CHECK11-NEXT: [[MUL42:%.*]] = mul nsw i32 [[DIV41]], 1 +// CHECK11-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL42]] +// CHECK11-NEXT: store i32 [[ADD43]], i32* [[J10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -1339,18 +1372,21 @@ // CHECK11-SAME: ([10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x [2 x i32]]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x [2 x i32]]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x [2 x i32]]* [[TMP0]], [10 x [2 x i32]]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [2 x i32]]* noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x [2 x i32]]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1362,68 +1398,70 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x [2 x i32]]* [[A]], [10 x [2 x i32]]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x [2 x i32]]*, [10 x [2 x i32]]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], i32* [[J]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP2]], i32 0, i32 [[TMP13]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !12 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: store i32 2, i32* [[J]], align 4 diff --git a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp @@ -224,18 +224,21 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -245,59 +248,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -309,18 +314,21 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -330,59 +338,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -394,18 +404,21 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -415,76 +428,78 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -592,18 +607,21 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -613,58 +631,60 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !7 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -676,18 +696,21 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -697,58 +720,60 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -760,18 +785,21 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -781,75 +809,77 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP13]] // CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1331,24 +1361,29 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1361,82 +1396,84 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1454,24 +1491,29 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1484,82 +1526,84 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1578,7 +1622,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -1587,23 +1631,26 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], i64* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -1616,102 +1663,105 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK9-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK9-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP30]], 0 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK9-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 // CHECK9-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 // CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] @@ -1802,18 +1852,21 @@ // CHECK9-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1823,58 +1876,60 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1886,18 +1941,21 @@ // CHECK9-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1907,58 +1965,60 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1970,18 +2030,21 @@ // CHECK9-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1991,75 +2054,77 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2293,23 +2358,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2322,81 +2392,83 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP17]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -2414,23 +2486,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2443,81 +2520,83 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP17]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -2536,7 +2615,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 @@ -2544,22 +2623,26 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[N_ADDR]], i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32* [[TMP1]], i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[TMP6]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2572,100 +2655,104 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP20]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 [[TMP26]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP30]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 // CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] @@ -2756,18 +2843,21 @@ // CHECK11-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2777,57 +2867,59 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2839,18 +2931,21 @@ // CHECK11-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2860,57 +2955,59 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2922,18 +3019,21 @@ // CHECK11-SAME: ([10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [10 x i32]* [[TMP0]], [10 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2943,74 +3043,76 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP2]], i32 0, i32 [[TMP13]] // CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp @@ -336,8 +336,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -348,154 +347,160 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 [[IDXPROM8]] -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !5 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8* +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group !5 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP28]] +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[SIVAR]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK1-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP27]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP35]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done9: // CHECK1-NEXT: ret void // // @@ -696,7 +701,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 @@ -706,24 +711,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP7]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -731,125 +739,128 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca %struct.S.0*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 8 +// CHECK1-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S.0]* [[TMP6]] to %struct.S.0* +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP11]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP13:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP13]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[ARRAYIDX10]] to i8* -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S.0* [[TMP18]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !11 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP4]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[ARRAYIDX7]] to i8* +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[TMP24]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP27]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP33]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -1140,8 +1151,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1150,148 +1160,158 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC1]] to i8* -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP17]] -// CHECK3-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* -// CHECK3-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false), !llvm.access.group !6 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP24]] +// CHECK3-NEXT: store i32 [[TMP23]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP25]] +// CHECK3-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK3-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group !6 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], [[TMP28]] +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[SIVAR]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]]) +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK3-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP27]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP35]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1491,7 +1511,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 @@ -1500,23 +1520,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 // CHECK3-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP5]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP7]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1524,122 +1548,126 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca %struct.S.0*, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca %struct.S.0*, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 4 +// CHECK3-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 [[TMP10]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x %struct.S.0]* [[TMP6]] to %struct.S.0* +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP11]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP7]], %struct.St* noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP13:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP13]], %struct.St* noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(%struct.St* noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP17]] -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* -// CHECK3-NEXT: [[TMP21:%.*]] = bitcast %struct.S.0* [[TMP18]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !12 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP23]] +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP4]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP25]] +// CHECK3-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[ARRAYIDX6]] to i8* +// CHECK3-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[TMP24]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP24]]) -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP27]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP33]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // @@ -2520,9 +2548,7 @@ // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 @@ -2530,34 +2556,31 @@ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load volatile i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* @g, align 4 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 @@ -2566,71 +2589,76 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[G_ADDR]] to i32* -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[G]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[G1]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[SIVAR]], align 4 +// CHECK9-NEXT: store i32* [[G1]], i32** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] -// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: store volatile i32 1, i32* [[TMP8]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP12]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store i32 1, i32* [[G]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store volatile i32 1, i32* [[TMP15]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: store i32 2, i32* [[SIVAR]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[G]], i32** [[TMP16]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store i32* [[TMP18]], i32** [[TMP17]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP19]], align 8, !llvm.access.group !4 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK9-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp @@ -163,6 +163,7 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], i64* [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], i64* [[SVAR_ADDR]], align 8 @@ -172,20 +173,25 @@ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SFVAR_ADDR]] to float* // CHECK1-NEXT: store double* [[CONV1]], double** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[CONV]], double* [[TMP0]], i32* [[CONV2]], float* [[CONV3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[CONV]], double** [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[TMP2]], double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[CONV3]], float** [[TMP4]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -193,104 +199,106 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store double* [[G]], double** [[G_ADDR]], align 8 -// CHECK1-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store double* [[TMP1]], double** [[TMP]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 8 +// CHECK1-NEXT: store double* [[TMP4]], double** [[TMP]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[G13]], double** [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 8 +// CHECK1-NEXT: store double* [[G1]], double** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: store double 1.000000e+00, double* [[G2]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP4]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP13]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: store i32 3, i32* [[SVAR5]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[G2]], double** [[TMP14]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP16:%.*]] = load double*, double** [[_TMP4]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: store double* [[TMP16]], double** [[TMP15]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[SVAR5]], i32** [[TMP17]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[SFVAR6]], float** [[TMP18]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store double 1.000000e+00, double* [[G]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP18:%.*]] = load double*, double** [[_TMP2]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP18]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store i32 3, i32* [[SVAR]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store double* [[G]], double** [[TMP19]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load double*, double** [[_TMP2]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store double* [[TMP21]], double** [[TMP20]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[SVAR]], i32** [[TMP22]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store float* [[SFVAR]], float** [[TMP23]], align 8, !llvm.access.group !4 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[G2]], align 8 -// CHECK1-NEXT: store volatile double [[TMP24]], double* [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load double*, double** [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load double, double* [[TMP25]], align 8 -// CHECK1-NEXT: store volatile double [[TMP26]], double* [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP27]], i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load float, float* [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP28]], float* [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load double, double* [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP29]], double* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load double*, double** [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load double, double* [[TMP30]], align 8 +// CHECK1-NEXT: store volatile double [[TMP31]], double* [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP32]], i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP33]], float* [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -332,6 +340,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 // CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], i32* [[SVAR_ADDR]], align 4 @@ -346,20 +355,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load double, double* [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], double* [[G13]], align 8 // CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, double*, double*, i32*, float*)* @.omp_outlined. to void (i32*, i32*, ...)*), double* [[G2]], double* [[TMP5]], i32* [[SVAR_ADDR]], float* [[CONV]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G2]], double** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load double*, double** [[_TMP4]], align 4 +// CHECK3-NEXT: store double* [[TMP7]], double** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[CONV]], float** [[TMP9]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -367,104 +381,106 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store double* [[G]], double** [[G_ADDR]], align 4 -// CHECK3-NEXT: store double* [[G1]], double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store float* [[SFVAR]], float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load float*, float** [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store double* [[TMP1]], double** [[TMP]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[TMP7]], align 4 +// CHECK3-NEXT: store double* [[TMP4]], double** [[TMP]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[G13]], double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load double*, double** [[TMP]], align 4 +// CHECK3-NEXT: store double* [[G1]], double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store double 1.000000e+00, double* [[G2]], align 8, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP13]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store i32 3, i32* [[SVAR5]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G2]], double** [[TMP14]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store double* [[TMP16]], double** [[TMP15]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR5]], i32** [[TMP17]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[SFVAR6]], float** [[TMP18]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store double 1.000000e+00, double* [[G]], align 8, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP18:%.*]] = load double*, double** [[_TMP2]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP18]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 3, i32* [[SVAR]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store double* [[G]], double** [[TMP19]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load double*, double** [[_TMP2]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store double* [[TMP21]], double** [[TMP20]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[SVAR]], i32** [[TMP22]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store float* [[SFVAR]], float** [[TMP23]], align 4, !llvm.access.group !5 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP24]], double* [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP25:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load double, double* [[TMP25]], align 4 -// CHECK3-NEXT: store volatile double [[TMP26]], double* [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP27]], i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load float, float* [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP28]], float* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load double, double* [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP29]], double* [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP30:%.*]] = load double*, double** [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load double, double* [[TMP30]], align 4 +// CHECK3-NEXT: store volatile double [[TMP31]], double* [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP32]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load float, float* [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP33]], float* [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -655,6 +671,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -666,21 +683,27 @@ // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK9-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32* [[CONV]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[CONV1]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP8]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -688,31 +711,33 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca %struct.S*, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 8 +// CHECK9-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -722,115 +747,115 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store %struct.S* [[VAR5]], %struct.S** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX10]] to i8* -// CHECK9-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[TMP16]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !5 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP22:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK9-NEXT: [[TMP24:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8* +// CHECK9-NEXT: [[TMP25:%.*]] = bitcast %struct.S* [[TMP22]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i64 4, i1 false), !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP27]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP30:%.*]] = bitcast [2 x %struct.S]* [[S_ARR4]] to %struct.S* -// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN12]], [[TMP31]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP33]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK9-NEXT: [[TMP35:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP36:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN7]], [[TMP37]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP30]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP36]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP31]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP34:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[TMP5]] to i8* -// CHECK9-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[TMP34]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK9-NEXT: store i32 [[TMP37]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP37]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done8: +// CHECK9-NEXT: [[TMP40:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = bitcast %struct.S* [[TMP11]] to i8* +// CHECK9-NEXT: [[TMP42:%.*]] = bitcast %struct.S* [[TMP40]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP41]], i8* align 4 [[TMP42]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP43]], i32* [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP44]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -1000,6 +1025,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 @@ -1009,20 +1035,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK9-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32* [[CONV]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.S.0* [[TMP7]], %struct.S.0** [[TMP6]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1030,28 +1061,30 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 8 +// CHECK9-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1061,113 +1094,113 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP13]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP15:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i64 0, i64 [[IDXPROM8]] -// CHECK9-NEXT: [[TMP17:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* -// CHECK9-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[TMP15]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i64 4, i1 false), !llvm.access.group !11 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 8, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK9-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5]] to i8* +// CHECK9-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false), !llvm.access.group !11 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP26]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN11]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN11]], [[TMP30]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK9-NEXT: [[TMP33:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP34:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR]] to %struct.S.0* +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN7]], [[TMP35]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK9-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP34]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK9-NEXT: [[TMP37:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done12: -// CHECK9-NEXT: [[TMP33:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[TMP4]] to i8* -// CHECK9-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[TMP33]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP35]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done8: +// CHECK9-NEXT: [[TMP38:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = bitcast %struct.S.0* [[TMP9]] to i8* +// CHECK9-NEXT: [[TMP40:%.*]] = bitcast %struct.S.0* [[TMP38]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP39]], i8* align 4 [[TMP40]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN13]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP41]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done14: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -1362,6 +1395,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1371,21 +1405,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 // CHECK11-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S]*, %struct.S*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32* [[T_VAR_ADDR]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP3]], i32* [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP7]], %struct.S** [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP8]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1393,31 +1433,33 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S*, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[SVAR]], i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[TMP3]], %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP9]], align 4 +// CHECK11-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1427,113 +1469,113 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store %struct.S* [[VAR5]], %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP11:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* -// CHECK11-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[TMP16]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false), !llvm.access.group !6 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP22:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP23]] +// CHECK11-NEXT: [[TMP24:%.*]] = bitcast %struct.S* [[ARRAYIDX4]] to i8* +// CHECK11-NEXT: [[TMP25:%.*]] = bitcast %struct.S* [[TMP22]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i32 4, i1 false), !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK11-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP27]], i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast [2 x %struct.S]* [[S_ARR4]] to %struct.S* -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN11]], [[TMP31]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP33]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK11-NEXT: [[TMP35:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = bitcast [2 x %struct.S]* [[S_ARR]] to %struct.S* +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN6]], [[TMP37]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP30]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP33:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP36]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP31]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP34:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[TMP5]] to i8* -// CHECK11-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[TMP34]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP37]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP37]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP40:%.*]] = load %struct.S*, %struct.S** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = bitcast %struct.S* [[TMP11]] to i8* +// CHECK11-NEXT: [[TMP42:%.*]] = bitcast %struct.S* [[TMP40]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP41]], i8* align 4 [[TMP42]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, i32* [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP43]], i32* [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP44]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -1702,6 +1744,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 @@ -1710,20 +1753,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 // CHECK11-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32* [[T_VAR_ADDR]], [2 x %struct.S.0]* [[TMP1]], %struct.S.0* [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.S.0* [[TMP7]], %struct.S.0** [[TMP6]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1731,28 +1779,30 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca %struct.S.0*, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[TMP3]], %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP7]], align 4 +// CHECK11-NEXT: store %struct.S.0* [[TMP8]], %struct.S.0** [[TMP]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1762,111 +1812,111 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP4:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store %struct.S.0* [[VAR5]], %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP14]] -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP15:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: [[TMP17:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* -// CHECK11-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[TMP15]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP17]], i8* align 4 [[TMP18]], i32 4, i1 false), !llvm.access.group !12 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* +// CHECK11-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false), !llvm.access.group !12 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, i32* [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK11-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK11-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP26]], i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR4]] to %struct.S.0* -// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN10]], [[TMP30]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK11-NEXT: [[TMP33:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP32]], i8* align 4 [[TMP33]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP34:%.*]] = bitcast [2 x %struct.S.0]* [[S_ARR]] to %struct.S.0* +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN6]], [[TMP35]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP29]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* -// CHECK11-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP34]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* +// CHECK11-NEXT: [[TMP37:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP33:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[TMP4]] to i8* -// CHECK11-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[TMP33]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP35]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP38:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = bitcast %struct.S.0* [[TMP9]] to i8* +// CHECK11-NEXT: [[TMP40:%.*]] = bitcast %struct.S.0* [[TMP38]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP39]], i8* align 4 [[TMP40]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP41]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp @@ -257,15 +257,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -280,6 +282,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -295,77 +299,77 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[ARRAYIDX3]] to i8* -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 4, i1 false), !llvm.access.group !5 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[ARRAYIDX3]] to i8* +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false), !llvm.access.group !5 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP14]] // CHECK1-NEXT: store i32 [[ADD4]], i32* [[SIVAR]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i64 2 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP21]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -445,15 +449,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 8 @@ -469,6 +475,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 @@ -486,74 +494,74 @@ // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 8, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP11:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5]] to i8* -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP10]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false), !llvm.access.group !11 +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5]] to i8* +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[TMP11]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 4, i1 false), !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -759,15 +767,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -782,6 +792,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 @@ -797,75 +809,75 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP10]] -// CHECK3-NEXT: [[TMP11:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 4, i1 false), !llvm.access.group !6 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 4, i1 false), !llvm.access.group !6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP14]] // CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP21]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -945,15 +957,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca %struct.S.0*, align 4 @@ -969,6 +983,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store %struct.S.0* undef, %struct.S.0** [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 @@ -986,72 +1002,72 @@ // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* -// CHECK3-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[TMP10]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 4, i1 false), !llvm.access.group !12 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP11:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP2]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[ARRAYIDX4]] to i8* +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast %struct.S.0* [[TMP11]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false), !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -2013,18 +2029,20 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: store i64 [[G1]], i64* [[G1_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 @@ -2040,66 +2058,68 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32* undef, i32** [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store i32* [[G1]], i32** [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 // CHECK9-NEXT: store i32 1, i32* [[G]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP2]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: store volatile i32 1, i32* [[TMP8]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[_TMP2]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store volatile i32 1, i32* [[TMP9]], align 4, !llvm.access.group !4 // CHECK9-NEXT: store i32 2, i32* [[SIVAR]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[G]], i32** [[TMP9]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[_TMP2]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP12]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[G]], i32** [[TMP10]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[_TMP2]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store i32* [[TMP12]], i32** [[TMP11]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP13]], align 8, !llvm.access.group !4 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp @@ -122,19 +122,22 @@ // CHECK1-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -145,83 +148,85 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR1]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[SIVAR]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -293,19 +298,22 @@ // CHECK1-SAME: (i64 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -316,83 +324,85 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[T_VAR1]], align 4 +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR1]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[T_VAR1]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[T_VAR]], align 4, !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, i32* [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i32* [[T_VAR1]] to i8* -// CHECK1-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK1-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -468,18 +478,21 @@ // CHECK3-SAME: (i32 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[SIVAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[SIVAR_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -490,83 +503,85 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR1]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[SIVAR]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP22]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -637,18 +652,21 @@ // CHECK3-SAME: (i32 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[T_VAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[T_VAR_ADDR]], i32** [[TMP0]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -659,83 +677,85 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[T_VAR1]], align 4 +// CHECK3-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[T_VAR1]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[T_VAR1]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[T_VAR]], align 4, !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, i32* [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i32* [[T_VAR1]] to i8* -// CHECK3-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, i8* [[TMP16]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i32* [[T_VAR]] to i8* +// CHECK3-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 4, i8* [[TMP18]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP20]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP22]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -986,19 +1006,22 @@ // CHECK9-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP0]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1010,86 +1033,88 @@ // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[SIVAR1]], align 4 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[SIVAR]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[SIVAR1]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[SIVAR1]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[SIVAR1]], i32** [[TMP11]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIVAR]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK9-NEXT: store i32 [[ADD2]], i32* [[SIVAR]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[SIVAR]], i32** [[TMP13]], align 8, !llvm.access.group !4 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, i32* [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP16:%.*]] = bitcast i32* [[SIVAR1]] to i8* -// CHECK9-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, i8* [[TMP17]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP18:%.*]] = bitcast i32* [[SIVAR]] to i8* +// CHECK9-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK9-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, i8* [[TMP19]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: store i32 [[ADD5]], i32* [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = atomicrmw add i32* [[TMP0]], i32 [[TMP21]] monotonic, align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], [8 x i32]* @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = atomicrmw add i32* [[TMP2]], i32 [[TMP23]] monotonic, align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void diff --git a/clang/test/OpenMP/teams_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_firstprivate_codegen.cpp --- a/clang/test/OpenMP/teams_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_firstprivate_codegen.cpp @@ -173,44 +173,48 @@ // CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[G1:%.*]] = alloca i32, align 128 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[G_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 128 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[G1]], align 128 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[G1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[G1]], i32** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1:%.*]] = alloca i32, align 128 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 128 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[G1]], align 128 -// CHECK1-NEXT: store i32 1, i32* [[G1]], align 128 -// CHECK1-NEXT: store i32 2, i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store i32* [[G1]], i32** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[CONV]], i32** [[TMP3]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 128 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[G]], align 128 +// CHECK1-NEXT: store i32 1, i32* [[G]], align 128 +// CHECK1-NEXT: store i32 2, i32* [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[G]], i32** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[TMP7]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK1-NEXT: ret void // @@ -238,41 +242,47 @@ // CHECK3-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 128 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[G_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 128 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[G1]], align 128 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[G1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[G1]], i32** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 128 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 128 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[G1]], align 128 -// CHECK3-NEXT: store i32 1, i32* [[G1]], align 128 -// CHECK3-NEXT: store i32 2, i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store i32* [[G1]], i32** [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[SIVAR_ADDR]], i32** [[TMP3]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 128 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[G]], align 128 +// CHECK3-NEXT: store i32 1, i32* [[G]], align 128 +// CHECK3-NEXT: store i32 2, i32* [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[G]], i32** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[TMP7]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 4 dereferenceable(8) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -439,8 +449,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 // CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 @@ -451,86 +460,92 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i64 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i64 [[TMP6]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [2 x i32]* nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 [[T_VAR:%.*]], [2 x %struct.S]* nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK9-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], i64* [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK9-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK9-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done4: -// CHECK9-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S* nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* [[AGG_TMP6]]) -// CHECK9-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP7]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP8:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* -// CHECK9-NEXT: [[TMP9:%.*]] = bitcast %struct.S* [[VAR5]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 4, i1 false) -// CHECK9-NEXT: store i32 2, i32* [[CONV1]], align 4 -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done1: +// CHECK9-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// CHECK9-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP16:%.*]] = bitcast %struct.S* [[ARRAYIDX3]] to i8* +// CHECK9-NEXT: [[TMP17:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP16]], i8* align 4 [[TMP17]], i64 4, i1 false) +// CHECK9-NEXT: store i32 2, i32* [[SIVAR]], align 4 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP10]], [[OMP_ARRAYCPY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP18]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done9: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done5: // CHECK9-NEXT: ret void // // @@ -581,27 +596,30 @@ // CHECK9-SAME: (i64 [[T_VAR:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[T_VAR]], align 4 // CHECK9-NEXT: ret void // // @@ -609,26 +627,26 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 128 // CHECK9-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x i8*], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x i8*], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x i8*], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, i32* [[T_VAR]], align 128 // CHECK9-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP0]], i8* align 128 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 signext 2) -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR]], i32 signext 3) +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 signext 1) +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 signext 2) +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* nonnull align 4 dereferenceable(4) [[VAR]], i32 signext 3) // CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP2:%.*]] = bitcast i8** [[TMP1]] to i32** // CHECK9-NEXT: store i32* [[T_VAR]], i32** [[TMP2]], align 8 @@ -646,19 +664,19 @@ // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK9-NEXT: store i8* null, i8** [[TMP10]], align 8 // CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK9-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to [2 x %struct.S.0]** -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to [2 x %struct.S.1]** +// CHECK9-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP12]], align 8 // CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK9-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to [2 x %struct.S.0]** -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to [2 x %struct.S.1]** +// CHECK9-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP14]], align 8 // CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 // CHECK9-NEXT: store i8* null, i8** [[TMP15]], align 8 // CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK9-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to %struct.S.0** -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to %struct.S.1** +// CHECK9-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[TMP17]], align 8 // CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK9-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to %struct.S.0** -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to %struct.S.1** +// CHECK9-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[TMP19]], align 8 // CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 // CHECK9-NEXT: store i8* null, i8** [[TMP20]], align 8 // CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -667,7 +685,7 @@ // CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK9-NEXT: br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: -// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75(i32* [[T_VAR]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75(i32* [[T_VAR]], [2 x i32]* [[VEC]], [2 x %struct.S.1]* [[S_ARR]], %struct.S.1* [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 @@ -688,18 +706,18 @@ // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT5]] // CHECK9: omp_offload.cont5: // CHECK9-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP34]], [[OMP_OFFLOAD_CONT5]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP34]], [[OMP_OFFLOAD_CONT5]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE6:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done6: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK9-NEXT: ret i32 [[TMP35]] // @@ -786,140 +804,148 @@ // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK9-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK9-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS1]], i32 signext [[TMP0]]) +// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS1]], i32 signext [[TMP0]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75 -// CHECK9-SAME: (i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.1]* nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 8 +// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK9-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 128 // CHECK9-NEXT: store i32 [[TMP4]], i32* [[T_VAR1]], align 128 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP1]], i32* [[T_VAR1]], [2 x %struct.S.0]* [[TMP2]], %struct.S.0* [[TMP3]]) +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store [2 x i32]* [[TMP1]], [2 x i32]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i32* [[T_VAR1]], i32** [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store [2 x %struct.S.1]* [[TMP2]], [2 x %struct.S.1]** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store %struct.S.1* [[TMP3]], %struct.S.1** [[TMP8]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [2 x i32]* nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK9-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 // CHECK9-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK9-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 128 +// CHECK9-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 128 -// CHECK9-NEXT: store i32 [[TMP4]], i32* [[T_VAR1]], align 128 -// CHECK9-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK9-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP5]], i8* align 128 [[TMP6]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP4]], align 128 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[T_VAR]], align 128 +// CHECK9-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK9-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP10]], i8* align 128 [[TMP11]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = bitcast [2 x %struct.S.1]* [[TMP6]] to %struct.S.1* +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP13]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP12]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP]]) -// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* [[AGG_TMP]]) +// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done4: -// CHECK9-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* nonnull align 4 dereferenceable(4) [[TMP3]], %struct.St* [[AGG_TMP6]]) -// CHECK9-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR1]], align 128 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 128 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP10:%.*]] = bitcast %struct.S.0* [[ARRAYIDX7]] to i8* -// CHECK9-NEXT: [[TMP11:%.*]] = bitcast %struct.S.0* [[VAR5]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP10]], i8* align 128 [[TMP11]], i64 4, i1 false) -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done1: +// CHECK9-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.1* nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR]], align 128 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 128 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP15:%.*]] = bitcast %struct.S.1* [[ARRAYIDX3]] to i8* +// CHECK9-NEXT: [[TMP16:%.*]] = bitcast %struct.S.1* [[VAR]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP15]], i8* align 128 [[TMP16]], i64 4, i1 false) +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP12]], [[OMP_ARRAYCPY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done9: +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP17]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done5: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_2St -// CHECK9-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* [[T]]) +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[S]], %struct.S.1** [[S_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S.1*, %struct.S.1** [[S_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.1* nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* [[T]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK9-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK9-NEXT: ret void // // @@ -928,51 +954,56 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 128 // CHECK9-NEXT: store i32 [[TMP1]], i32* [[T_VAR1]], align 128 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* [[T_VAR1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[T_VAR1]], i32** [[TMP2]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 128 -// CHECK9-NEXT: store i32 [[TMP1]], i32* [[T_VAR1]], align 128 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 128 +// CHECK9-NEXT: store i32 [[TMP3]], i32* [[T_VAR]], align 128 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK9-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 128 // CHECK9-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK9-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 128 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -981,16 +1012,16 @@ // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_2St -// CHECK9-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 -// CHECK9-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[S]], %struct.S.1** [[S_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S.1*, %struct.S.1** [[S_ADDR]], align 8 +// CHECK9-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[TMP0]], i32 0, i32 0 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[T]], i32 0, i32 0 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 @@ -1000,11 +1031,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK9-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1167,8 +1198,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 // CHECK11-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 @@ -1177,82 +1207,92 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[SIVAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP0]], i32 [[TMP4]], [2 x %struct.S]* [[TMP1]], %struct.S* [[TMP2]], i32 [[TMP6]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [2 x i32]* [[TMP0]], [2 x i32]** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[TMP8]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [2 x i32]* nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 [[T_VAR:%.*]], [2 x %struct.S]* nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S* nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 4 -// CHECK11-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK11-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK11-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[SIVAR]], i32* [[SIVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = bitcast [2 x i32]* [[VEC1]] to i8* -// CHECK11-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[SIVAR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: [[TMP12:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK11-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* [[AGG_TMP]]) // CHECK11-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done3: -// CHECK11-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK11-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* nonnull align 4 dereferenceable(4) [[VAR4]], %struct.S* nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* [[AGG_TMP5]]) -// CHECK11-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP7]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP8:%.*]] = bitcast %struct.S* [[ARRAYIDX6]] to i8* -// CHECK11-NEXT: [[TMP9:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 4, i1 false) -// CHECK11-NEXT: store i32 2, i32* [[SIVAR_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done1: +// CHECK11-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK11-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* nonnull align 4 dereferenceable(4) [[VAR]], %struct.S* nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* [[AGG_TMP2]]) +// CHECK11-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP16:%.*]] = bitcast %struct.S* [[ARRAYIDX3]] to i8* +// CHECK11-NEXT: [[TMP17:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP16]], i8* align 4 [[TMP17]], i32 4, i1 false) +// CHECK11-NEXT: store i32 2, i32* [[SIVAR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN4]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP10]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP18]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done8: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done5: // CHECK11-NEXT: ret void // // @@ -1303,24 +1343,29 @@ // CHECK11-SAME: (i32 [[T_VAR:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[TMP0]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[T_VAR]], i32* [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[T_VAR]], align 4 // CHECK11-NEXT: ret void // // @@ -1328,26 +1373,26 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 128 // CHECK11-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK11-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x i8*], align 4 // CHECK11-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x i8*], align 4 // CHECK11-NEXT: [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x i8*], align 4 // CHECK11-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x i8*], align 4 // CHECK11-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x i8*], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, i32* [[T_VAR]], align 128 // CHECK11-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 128 [[TMP0]], i8* align 128 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR]], i32 3) +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 1) +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* nonnull align 4 dereferenceable(4) [[VAR]], i32 3) // CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP2:%.*]] = bitcast i8** [[TMP1]] to i32** // CHECK11-NEXT: store i32* [[T_VAR]], i32** [[TMP2]], align 4 @@ -1365,19 +1410,19 @@ // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK11-NEXT: store i8* null, i8** [[TMP10]], align 4 // CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to [2 x %struct.S.0]** -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to [2 x %struct.S.1]** +// CHECK11-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP12]], align 4 // CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to [2 x %struct.S.0]** -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to [2 x %struct.S.1]** +// CHECK11-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP14]], align 4 // CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 // CHECK11-NEXT: store i8* null, i8** [[TMP15]], align 4 // CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK11-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to %struct.S.0** -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to %struct.S.1** +// CHECK11-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[TMP17]], align 4 // CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK11-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to %struct.S.0** -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[TMP19]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to %struct.S.1** +// CHECK11-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[TMP19]], align 4 // CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 // CHECK11-NEXT: store i8* null, i8** [[TMP20]], align 4 // CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -1386,7 +1431,7 @@ // CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK11-NEXT: br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: -// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75(i32* [[T_VAR]], [2 x i32]* [[VEC]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75(i32* [[T_VAR]], [2 x i32]* [[VEC]], [2 x %struct.S.1]* [[S_ARR]], %struct.S.1* [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 @@ -1407,18 +1452,18 @@ // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT5]] // CHECK11: omp_offload.cont5: // CHECK11-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP34]], [[OMP_OFFLOAD_CONT5]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP34]], [[OMP_OFFLOAD_CONT5]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE6:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done6: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK11-NEXT: ret i32 [[TMP35]] // @@ -1505,140 +1550,148 @@ // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK11-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK11-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS1]], i32 [[TMP0]]) +// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS1]], i32 [[TMP0]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75 -// CHECK11-SAME: (i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.0]* nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x i32]* nonnull align 4 dereferenceable(8) [[VEC:%.*]], [2 x %struct.S.1]* nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.1* nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.1]*, align 4 +// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 +// CHECK11-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[VAR_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[S_ARR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.1*, %struct.S.1** [[VAR_ADDR]], align 4 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 128 // CHECK11-NEXT: store i32 [[TMP4]], i32* [[T_VAR1]], align 128 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i32*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), [2 x i32]* [[TMP1]], i32* [[T_VAR1]], [2 x %struct.S.0]* [[TMP2]], %struct.S.0* [[TMP3]]) +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store [2 x i32]* [[TMP1]], [2 x i32]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32* [[T_VAR1]], i32** [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store [2 x %struct.S.1]* [[TMP2]], [2 x %struct.S.1]** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store %struct.S.1* [[TMP3]], %struct.S.1** [[TMP8]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [2 x i32]* nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], [2 x %struct.S.0]* nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK11-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 128 -// CHECK11-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 // CHECK11-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK11-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 128 +// CHECK11-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP1]], align 128 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[T_VAR1]], align 128 -// CHECK11-NEXT: [[TMP5:%.*]] = bitcast [2 x i32]* [[VEC2]] to i8* -// CHECK11-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 128 [[TMP5]], i8* align 128 [[TMP6]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP7:%.*]] = bitcast [2 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP8]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP4]], align 128 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[T_VAR]], align 128 +// CHECK11-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK11-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 128 [[TMP10]], i8* align 128 [[TMP11]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP12:%.*]] = bitcast [2 x %struct.S.1]* [[TMP6]] to %struct.S.1* +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP13]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP12]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP]]) -// CHECK11-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* [[AGG_TMP]]) +// CHECK11-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], %struct.St* [[AGG_TMP]]) // CHECK11-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done4: -// CHECK11-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK11-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S.0* nonnull align 4 dereferenceable(4) [[TMP3]], %struct.St* [[AGG_TMP6]]) -// CHECK11-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[T_VAR1]], align 128 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX]], align 128 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP10:%.*]] = bitcast %struct.S.0* [[ARRAYIDX7]] to i8* -// CHECK11-NEXT: [[TMP11:%.*]] = bitcast %struct.S.0* [[VAR5]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 128 [[TMP10]], i8* align 128 [[TMP11]], i32 4, i1 false) -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN8]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done1: +// CHECK11-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK11-NEXT: call void @_ZN1SIiEC1ERKS0_2St(%struct.S.1* nonnull align 4 dereferenceable(4) [[VAR]], %struct.S.1* nonnull align 4 dereferenceable(4) [[TMP8]], %struct.St* [[AGG_TMP2]]) +// CHECK11-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR]], align 128 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 128 +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = bitcast %struct.S.1* [[ARRAYIDX3]] to i8* +// CHECK11-NEXT: [[TMP16:%.*]] = bitcast %struct.S.1* [[VAR]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 128 [[TMP15]], i8* align 128 [[TMP16]], i32 4, i1 false) +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN4]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP12]], [[OMP_ARRAYCPY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done9: +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP17]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done5: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_2St -// CHECK11-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* [[T]]) +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[S]], %struct.S.1** [[S_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.S.1*, %struct.S.1** [[S_ADDR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC2ERKS0_2St(%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.1* nonnull align 4 dereferenceable(4) [[TMP0]], %struct.St* [[T]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK11-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK11-NEXT: ret void // // @@ -1647,51 +1700,56 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 128 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[T_VAR1]], align 128 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* [[T_VAR1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32* [[T_VAR1]], i32** [[TMP2]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 128 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[T_VAR1]], align 128 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 128 +// CHECK11-NEXT: store i32 [[TMP3]], i32* [[T_VAR]], align 128 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK11-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 128 // CHECK11-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK11-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 128 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1700,16 +1758,16 @@ // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_2St -// CHECK11-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* nonnull align 4 dereferenceable(4) [[S:%.*]], %struct.St* [[T:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 4 -// CHECK11-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[S]], %struct.S.1** [[S_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.S.1*, %struct.S.1** [[S_ADDR]], align 4 +// CHECK11-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[TMP0]], i32 0, i32 0 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 // CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[T]], i32 0, i32 0 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 @@ -1719,11 +1777,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK11-SAME: (%struct.S.0* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1868,6 +1926,7 @@ // CHECK17-NEXT: [[VLA_ADDR5:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA2_ADDR:%.*]] = alloca double*, align 8 // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK17-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 @@ -1881,63 +1940,74 @@ // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR5]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load double*, double** [[VLA2_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP4:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load ppc_fp128*, ppc_fp128** [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load float*, float** [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 8, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.St*, i32*, i64, ppc_fp128*, float*, i64, i64, double*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.St* [[TMP4]], i32* [[CONV]], i64 [[TMP0]], ppc_fp128* [[TMP5]], float* [[TMP6]], i64 [[TMP1]], i64 [[TMP2]], double* [[TMP3]]) +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP5:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 +// CHECK17-NEXT: store %struct.St* [[TMP5]], %struct.St** [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32* [[CONV]], i32** [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP0]], i64* [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP9:%.*]] = load ppc_fp128*, ppc_fp128** [[VLA1_ADDR]], align 8 +// CHECK17-NEXT: store ppc_fp128* [[TMP9]], ppc_fp128** [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK17-NEXT: store float* [[TMP11]], float** [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store i64 [[TMP1]], i64* [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK17-NEXT: store double* [[TMP3]], double** [[TMP14]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.St* [[S:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], i64 [[VLA:%.*]], ppc_fp128* [[VLA1:%.*]], float* [[A:%.*]], i64 [[VLA2:%.*]], i64 [[VLA4:%.*]], double* nonnull align 8 dereferenceable(8) [[VLA26:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA1_ADDR:%.*]] = alloca ppc_fp128*, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 -// CHECK17-NEXT: [[VLA_ADDR3:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR5:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA2_ADDR:%.*]] = alloca double*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK17-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK17-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ppc_fp128* [[VLA1]], ppc_fp128** [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA2]], i64* [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: store i64 [[VLA4]], i64* [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: store double* [[VLA26]], double** [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load double*, double** [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = call i8* @llvm.stacksave() -// CHECK17-NEXT: store i8* [[TMP5]], i8** [[SAVED_STACK]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[VLA7:%.*]] = alloca double, i64 [[TMP6]], align 128 -// CHECK17-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 -// CHECK17-NEXT: store i64 [[TMP3]], i64* [[__VLA_EXPR1]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 8 -// CHECK17-NEXT: [[TMP9:%.*]] = bitcast double* [[VLA7]] to i8* -// CHECK17-NEXT: [[TMP10:%.*]] = bitcast double* [[TMP4]] to i8* -// CHECK17-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP9]], i8* align 128 [[TMP10]], i64 [[TMP8]], i1 false) -// CHECK17-NEXT: [[TMP11:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[TMP11]], i64 0 -// CHECK17-NEXT: [[TMP12:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load ppc_fp128*, ppc_fp128** [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: call void @_ZN2St7St_funcEPS_iPg(%struct.St* nonnull align 4 dereferenceable(8) [[ARRAYIDX]], %struct.St* [[TMP12]], i32 signext [[TMP13]], ppc_fp128* [[TMP14]]) -// CHECK17-NEXT: [[TMP15:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK17-NEXT: call void @llvm.stackrestore(i8* [[TMP15]]) +// CHECK17-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP13:%.*]] = load double*, double** [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = call i8* @llvm.stacksave() +// CHECK17-NEXT: store i8* [[TMP14]], i8** [[SAVED_STACK]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP9]], [[TMP11]] +// CHECK17-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP15]], align 128 +// CHECK17-NEXT: store i64 [[TMP9]], i64* [[__VLA_EXPR0]], align 8 +// CHECK17-NEXT: store i64 [[TMP11]], i64* [[__VLA_EXPR1]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP9]], [[TMP11]] +// CHECK17-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 8 +// CHECK17-NEXT: [[TMP18:%.*]] = bitcast double* [[VLA]] to i8* +// CHECK17-NEXT: [[TMP19:%.*]] = bitcast double* [[TMP13]] to i8* +// CHECK17-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP18]], i8* align 128 [[TMP19]], i64 [[TMP17]], i1 false) +// CHECK17-NEXT: [[TMP20:%.*]] = load %struct.St*, %struct.St** [[TMP1]], align 8 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[TMP20]], i64 0 +// CHECK17-NEXT: [[TMP21:%.*]] = load %struct.St*, %struct.St** [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load ppc_fp128*, ppc_fp128** [[TMP6]], align 8 +// CHECK17-NEXT: call void @_ZN2St7St_funcEPS_iPg(%struct.St* nonnull align 4 dereferenceable(8) [[ARRAYIDX]], %struct.St* [[TMP21]], i32 signext [[TMP22]], ppc_fp128* [[TMP23]]) +// CHECK17-NEXT: [[TMP24:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK17-NEXT: call void @llvm.stackrestore(i8* [[TMP24]]) // CHECK17-NEXT: ret void // // @@ -2106,6 +2176,7 @@ // CHECK17-NEXT: [[VLA2_ADDR:%.*]] = alloca double*, align 8 // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.St*, align 8 // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ppc_fp128* [[VLA1]], ppc_fp128** [[VLA1_ADDR]], align 8 @@ -2120,76 +2191,87 @@ // CHECK17-NEXT: [[TMP3:%.*]] = load double*, double** [[VLA2_ADDR]], align 8 // CHECK17-NEXT: [[TMP4:%.*]] = load %struct.St*, %struct.St** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK17-NEXT: [[TMP5:%.*]] = load ppc_fp128*, ppc_fp128** [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 8, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, ppc_fp128*, %struct.St*, i64, i64, double*, i32*, %struct.St*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP0]], ppc_fp128* [[TMP5]], %struct.St* [[TMP4]], i64 [[TMP1]], i64 [[TMP2]], double* [[TMP3]], i32* [[CONV]], %struct.St* [[TMP6]]) +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i64 [[TMP0]], i64* [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP7:%.*]] = load ppc_fp128*, ppc_fp128** [[VLA1_ADDR]], align 8 +// CHECK17-NEXT: store ppc_fp128* [[TMP7]], ppc_fp128** [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store %struct.St* [[TMP4]], %struct.St** [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP1]], i64* [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i64 [[TMP2]], i64* [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store double* [[TMP3]], double** [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: store i32* [[CONV]], i32** [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP14:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 +// CHECK17-NEXT: store %struct.St* [[TMP14]], %struct.St** [[TMP13]], align 8 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[VLA:%.*]], ppc_fp128* [[VLA1:%.*]], %struct.St* [[THIS:%.*]], i64 [[VLA2:%.*]], i64 [[VLA4:%.*]], double* nonnull align 8 dereferenceable(8) [[VLA26:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.St* [[S:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA1_ADDR:%.*]] = alloca ppc_fp128*, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.St*, align 8 -// CHECK17-NEXT: [[VLA_ADDR3:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR5:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA2_ADDR:%.*]] = alloca double*, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 8 -// CHECK17-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK17-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK17-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ppc_fp128* [[VLA1]], ppc_fp128** [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: store %struct.St* [[THIS]], %struct.St** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA2]], i64* [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: store i64 [[VLA4]], i64* [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: store double* [[VLA26]], double** [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load %struct.St*, %struct.St** [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load double*, double** [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32*, i32** [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = call i8* @llvm.stacksave() -// CHECK17-NEXT: store i8* [[TMP6]], i8** [[SAVED_STACK]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[VLA7:%.*]] = alloca double, i64 [[TMP7]], align 128 -// CHECK17-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 -// CHECK17-NEXT: store i64 [[TMP3]], i64* [[__VLA_EXPR1]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 8 -// CHECK17-NEXT: [[TMP10:%.*]] = bitcast double* [[VLA7]] to i8* -// CHECK17-NEXT: [[TMP11:%.*]] = bitcast double* [[TMP4]] to i8* -// CHECK17-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP10]], i8* align 128 [[TMP11]], i64 [[TMP9]], i1 false) -// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[TMP1]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[B]], align 4 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[TMP1]], i32 0, i32 0 -// CHECK17-NEXT: store i32 [[TMP12]], i32* [[A]], align 4 -// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK17-NEXT: [[TMP13:%.*]] = mul nsw i64 1, [[TMP3]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[VLA7]], i64 [[TMP13]] -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 1 +// CHECK17-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP5:%.*]] = load %struct.St*, %struct.St** [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP11:%.*]] = load double*, double** [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP15:%.*]] = call i8* @llvm.stacksave() +// CHECK17-NEXT: store i8* [[TMP15]], i8** [[SAVED_STACK]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP7]], [[TMP9]] +// CHECK17-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP16]], align 128 +// CHECK17-NEXT: store i64 [[TMP7]], i64* [[__VLA_EXPR0]], align 8 +// CHECK17-NEXT: store i64 [[TMP9]], i64* [[__VLA_EXPR1]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP7]], [[TMP9]] +// CHECK17-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 +// CHECK17-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA]] to i8* +// CHECK17-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP11]] to i8* +// CHECK17-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP19]], i8* align 128 [[TMP20]], i64 [[TMP18]], i1 false) +// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[TMP5]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[B]], align 4 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[TMP5]], i32 0, i32 0 +// CHECK17-NEXT: store i32 [[TMP21]], i32* [[A]], align 4 +// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP21]] to double +// CHECK17-NEXT: [[TMP22:%.*]] = mul nsw i64 1, [[TMP9]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[VLA]], i64 [[TMP22]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP23]], 1 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[SUB]] to i64 -// CHECK17-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX]], i64 [[IDXPROM]] -// CHECK17-NEXT: store double [[CONV]], double* [[ARRAYIDX8]], align 8 -// CHECK17-NEXT: [[CONV9:%.*]] = fpext double [[CONV]] to ppc_fp128 -// CHECK17-NEXT: [[TMP15:%.*]] = load ppc_fp128*, ppc_fp128** [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: [[B10:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[TMP1]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[B10]], align 4 -// CHECK17-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds ppc_fp128, ppc_fp128* [[TMP15]], i64 [[IDXPROM11]] -// CHECK17-NEXT: store ppc_fp128 [[CONV9]], ppc_fp128* [[ARRAYIDX12]], align 16 -// CHECK17-NEXT: [[TMP17:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 -// CHECK17-NEXT: call void @llvm.stackrestore(i8* [[TMP17]]) +// CHECK17-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX]], i64 [[IDXPROM]] +// CHECK17-NEXT: store double [[CONV]], double* [[ARRAYIDX1]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = fpext double [[CONV]] to ppc_fp128 +// CHECK17-NEXT: [[TMP24:%.*]] = load ppc_fp128*, ppc_fp128** [[TMP3]], align 8 +// CHECK17-NEXT: [[B3:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[TMP5]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[B3]], align 4 +// CHECK17-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK17-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds ppc_fp128, ppc_fp128* [[TMP24]], i64 [[IDXPROM4]] +// CHECK17-NEXT: store ppc_fp128 [[CONV2]], ppc_fp128* [[ARRAYIDX5]], align 16 +// CHECK17-NEXT: [[TMP26:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK17-NEXT: call void @llvm.stackrestore(i8* [[TMP26]]) // CHECK17-NEXT: ret void // // @@ -2331,6 +2413,7 @@ // CHECK19-NEXT: [[VLA_ADDR5:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA2_ADDR:%.*]] = alloca double*, align 4 // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store float* [[A]], float** [[A_ADDR]], align 4 // CHECK19-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 @@ -2343,63 +2426,74 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR3]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR5]], align 4 // CHECK19-NEXT: [[TMP3:%.*]] = load double*, double** [[VLA2_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load x86_fp80*, x86_fp80** [[VLA1_ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load float*, float** [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 8, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.St*, i32*, i32, x86_fp80*, float*, i32, i32, double*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.St* [[TMP4]], i32* [[N_ADDR]], i32 [[TMP0]], x86_fp80* [[TMP5]], float* [[TMP6]], i32 [[TMP1]], i32 [[TMP2]], double* [[TMP3]]) +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP5:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 4 +// CHECK19-NEXT: store %struct.St* [[TMP5]], %struct.St** [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32* [[N_ADDR]], i32** [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP9:%.*]] = load x86_fp80*, x86_fp80** [[VLA1_ADDR]], align 4 +// CHECK19-NEXT: store x86_fp80* [[TMP9]], x86_fp80** [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load float*, float** [[A_ADDR]], align 4 +// CHECK19-NEXT: store float* [[TMP11]], float** [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: store i32 [[TMP1]], i32* [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK19-NEXT: store double* [[TMP3]], double** [[TMP14]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.St* [[S:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], i32 [[VLA:%.*]], x86_fp80* [[VLA1:%.*]], float* [[A:%.*]], i32 [[VLA2:%.*]], i32 [[VLA4:%.*]], double* nonnull align 4 dereferenceable(8) [[VLA26:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA1_ADDR:%.*]] = alloca x86_fp80*, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca float*, align 4 -// CHECK19-NEXT: [[VLA_ADDR3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR5:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA2_ADDR:%.*]] = alloca double*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK19-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 // CHECK19-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store x86_fp80* [[VLA1]], x86_fp80** [[VLA1_ADDR]], align 4 -// CHECK19-NEXT: store float* [[A]], float** [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA2]], i32* [[VLA_ADDR3]], align 4 -// CHECK19-NEXT: store i32 [[VLA4]], i32* [[VLA_ADDR5]], align 4 -// CHECK19-NEXT: store double* [[VLA26]], double** [[VLA2_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR3]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[VLA_ADDR5]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load double*, double** [[VLA2_ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = call i8* @llvm.stacksave() -// CHECK19-NEXT: store i8* [[TMP5]], i8** [[SAVED_STACK]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP2]], [[TMP3]] -// CHECK19-NEXT: [[VLA7:%.*]] = alloca double, i32 [[TMP6]], align 128 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[__VLA_EXPR1]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP2]], [[TMP3]] -// CHECK19-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 8 -// CHECK19-NEXT: [[TMP9:%.*]] = bitcast double* [[VLA7]] to i8* -// CHECK19-NEXT: [[TMP10:%.*]] = bitcast double* [[TMP4]] to i8* -// CHECK19-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 128 [[TMP9]], i8* align 128 [[TMP10]], i32 [[TMP8]], i1 false) -// CHECK19-NEXT: [[TMP11:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[TMP11]], i32 0 -// CHECK19-NEXT: [[TMP12:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load x86_fp80*, x86_fp80** [[VLA1_ADDR]], align 4 -// CHECK19-NEXT: call void @_ZN2St7St_funcEPS_iPe(%struct.St* nonnull align 4 dereferenceable(8) [[ARRAYIDX]], %struct.St* [[TMP12]], i32 [[TMP13]], x86_fp80* [[TMP14]]) -// CHECK19-NEXT: [[TMP15:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 -// CHECK19-NEXT: call void @llvm.stackrestore(i8* [[TMP15]]) +// CHECK19-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 6 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 7 +// CHECK19-NEXT: [[TMP13:%.*]] = load double*, double** [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = call i8* @llvm.stacksave() +// CHECK19-NEXT: store i8* [[TMP14]], i8** [[SAVED_STACK]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = mul nuw i32 [[TMP9]], [[TMP11]] +// CHECK19-NEXT: [[VLA:%.*]] = alloca double, i32 [[TMP15]], align 128 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[__VLA_EXPR0]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], i32* [[__VLA_EXPR1]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP9]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP16]], 8 +// CHECK19-NEXT: [[TMP18:%.*]] = bitcast double* [[VLA]] to i8* +// CHECK19-NEXT: [[TMP19:%.*]] = bitcast double* [[TMP13]] to i8* +// CHECK19-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 128 [[TMP18]], i8* align 128 [[TMP19]], i32 [[TMP17]], i1 false) +// CHECK19-NEXT: [[TMP20:%.*]] = load %struct.St*, %struct.St** [[TMP1]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[TMP20]], i32 0 +// CHECK19-NEXT: [[TMP21:%.*]] = load %struct.St*, %struct.St** [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load x86_fp80*, x86_fp80** [[TMP6]], align 4 +// CHECK19-NEXT: call void @_ZN2St7St_funcEPS_iPe(%struct.St* nonnull align 4 dereferenceable(8) [[ARRAYIDX]], %struct.St* [[TMP21]], i32 [[TMP22]], x86_fp80* [[TMP23]]) +// CHECK19-NEXT: [[TMP24:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK19-NEXT: call void @llvm.stackrestore(i8* [[TMP24]]) // CHECK19-NEXT: ret void // // @@ -2565,6 +2659,7 @@ // CHECK19-NEXT: [[VLA2_ADDR:%.*]] = alloca double*, align 4 // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.St*, align 4 // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 // CHECK19-NEXT: store x86_fp80* [[VLA1]], x86_fp80** [[VLA1_ADDR]], align 4 @@ -2578,74 +2673,85 @@ // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR5]], align 4 // CHECK19-NEXT: [[TMP3:%.*]] = load double*, double** [[VLA2_ADDR]], align 4 // CHECK19-NEXT: [[TMP4:%.*]] = load %struct.St*, %struct.St** [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load x86_fp80*, x86_fp80** [[VLA1_ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 8, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, x86_fp80*, %struct.St*, i32, i32, double*, i32*, %struct.St*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP0]], x86_fp80* [[TMP5]], %struct.St* [[TMP4]], i32 [[TMP1]], i32 [[TMP2]], double* [[TMP3]], i32* [[N_ADDR]], %struct.St* [[TMP6]]) +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32 [[TMP0]], i32* [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP7:%.*]] = load x86_fp80*, x86_fp80** [[VLA1_ADDR]], align 4 +// CHECK19-NEXT: store x86_fp80* [[TMP7]], x86_fp80** [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store %struct.St* [[TMP4]], %struct.St** [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP1]], i32* [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: store double* [[TMP3]], double** [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK19-NEXT: store i32* [[N_ADDR]], i32** [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK19-NEXT: [[TMP14:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 4 +// CHECK19-NEXT: store %struct.St* [[TMP14]], %struct.St** [[TMP13]], align 4 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[VLA:%.*]], x86_fp80* [[VLA1:%.*]], %struct.St* [[THIS:%.*]], i32 [[VLA2:%.*]], i32 [[VLA4:%.*]], double* nonnull align 4 dereferenceable(8) [[VLA26:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.St* [[S:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA1_ADDR:%.*]] = alloca x86_fp80*, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.St*, align 4 -// CHECK19-NEXT: [[VLA_ADDR3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR5:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA2_ADDR:%.*]] = alloca double*, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32*, align 4 -// CHECK19-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK19-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 // CHECK19-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store x86_fp80* [[VLA1]], x86_fp80** [[VLA1_ADDR]], align 4 -// CHECK19-NEXT: store %struct.St* [[THIS]], %struct.St** [[THIS_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA2]], i32* [[VLA_ADDR3]], align 4 -// CHECK19-NEXT: store i32 [[VLA4]], i32* [[VLA_ADDR5]], align 4 -// CHECK19-NEXT: store double* [[VLA26]], double** [[VLA2_ADDR]], align 4 -// CHECK19-NEXT: store i32* [[N]], i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load %struct.St*, %struct.St** [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR3]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[VLA_ADDR5]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load double*, double** [[VLA2_ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32*, i32** [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = call i8* @llvm.stacksave() -// CHECK19-NEXT: store i8* [[TMP6]], i8** [[SAVED_STACK]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP2]], [[TMP3]] -// CHECK19-NEXT: [[VLA7:%.*]] = alloca double, i32 [[TMP7]], align 128 -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], i32* [[__VLA_EXPR1]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP2]], [[TMP3]] -// CHECK19-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 8 -// CHECK19-NEXT: [[TMP10:%.*]] = bitcast double* [[VLA7]] to i8* -// CHECK19-NEXT: [[TMP11:%.*]] = bitcast double* [[TMP4]] to i8* -// CHECK19-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 128 [[TMP10]], i8* align 128 [[TMP11]], i32 [[TMP9]], i1 false) -// CHECK19-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[TMP1]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[B]], align 4 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[TMP1]], i32 0, i32 0 -// CHECK19-NEXT: store i32 [[TMP12]], i32* [[A]], align 4 -// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK19-NEXT: [[TMP13:%.*]] = mul nsw i32 1, [[TMP3]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[VLA7]], i32 [[TMP13]] -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 1 -// CHECK19-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX]], i32 [[SUB]] -// CHECK19-NEXT: store double [[CONV]], double* [[ARRAYIDX8]], align 8 -// CHECK19-NEXT: [[CONV9:%.*]] = fpext double [[CONV]] to x86_fp80 -// CHECK19-NEXT: [[TMP15:%.*]] = load x86_fp80*, x86_fp80** [[VLA1_ADDR]], align 4 -// CHECK19-NEXT: [[B10:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[TMP1]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[B10]], align 4 -// CHECK19-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds x86_fp80, x86_fp80* [[TMP15]], i32 [[TMP16]] -// CHECK19-NEXT: store x86_fp80 [[CONV9]], x86_fp80* [[ARRAYIDX11]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 -// CHECK19-NEXT: call void @llvm.stackrestore(i8* [[TMP17]]) +// CHECK19-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP5:%.*]] = load %struct.St*, %struct.St** [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP11:%.*]] = load double*, double** [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 6 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 7 +// CHECK19-NEXT: [[TMP15:%.*]] = call i8* @llvm.stacksave() +// CHECK19-NEXT: store i8* [[TMP15]], i8** [[SAVED_STACK]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP7]], [[TMP9]] +// CHECK19-NEXT: [[VLA:%.*]] = alloca double, i32 [[TMP16]], align 128 +// CHECK19-NEXT: store i32 [[TMP7]], i32* [[__VLA_EXPR0]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], i32* [[__VLA_EXPR1]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP7]], [[TMP9]] +// CHECK19-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 +// CHECK19-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA]] to i8* +// CHECK19-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP11]] to i8* +// CHECK19-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 128 [[TMP19]], i8* align 128 [[TMP20]], i32 [[TMP18]], i1 false) +// CHECK19-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.St* [[TMP5]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[B]], align 4 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[TMP5]], i32 0, i32 0 +// CHECK19-NEXT: store i32 [[TMP21]], i32* [[A]], align 4 +// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP21]] to double +// CHECK19-NEXT: [[TMP22:%.*]] = mul nsw i32 1, [[TMP9]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[VLA]], i32 [[TMP22]] +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP23]], 1 +// CHECK19-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX]], i32 [[SUB]] +// CHECK19-NEXT: store double [[CONV]], double* [[ARRAYIDX1]], align 8 +// CHECK19-NEXT: [[CONV2:%.*]] = fpext double [[CONV]] to x86_fp80 +// CHECK19-NEXT: [[TMP24:%.*]] = load x86_fp80*, x86_fp80** [[TMP3]], align 4 +// CHECK19-NEXT: [[B3:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.St* [[TMP5]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[B3]], align 4 +// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds x86_fp80, x86_fp80* [[TMP24]], i32 [[TMP25]] +// CHECK19-NEXT: store x86_fp80 [[CONV2]], x86_fp80* [[ARRAYIDX4]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK19-NEXT: call void @llvm.stackrestore(i8* [[TMP26]]) // CHECK19-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_private_codegen.cpp b/clang/test/OpenMP/teams_private_codegen.cpp --- a/clang/test/OpenMP/teams_private_codegen.cpp +++ b/clang/test/OpenMP/teams_private_codegen.cpp @@ -233,18 +233,21 @@ // CHECK1-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -253,20 +256,22 @@ // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK1-NEXT: store i32* [[A]], i32** [[TMP]], align 8 // CHECK1-NEXT: store i32* [[C]], i32** [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK1-NEXT: store i32* [[TMP3]], i32** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[B]], i32** [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[_TMP1]], align 8 -// CHECK1-NEXT: store i32* [[TMP6]], i32** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store i32* [[B]], i32** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK1-NEXT: store i32* [[TMP8]], i32** [[TMP7]], align 8 // CHECK1-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(%class.anon.0* noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: ret void // @@ -300,27 +305,31 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l117 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 1, i32* [[G]], align 128 // CHECK1-NEXT: store i32 2, i32* [[SIVAR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store i32* [[G]], i32** [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[TMP1]], align 8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store i32* [[G]], i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store i32* [[SIVAR]], i32** [[TMP2]], align 8 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.2* noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK1-NEXT: ret void // // @@ -401,18 +410,21 @@ // CHECK3-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -421,20 +433,22 @@ // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK3-NEXT: store i32* [[A]], i32** [[TMP]], align 4 // CHECK3-NEXT: store i32* [[C]], i32** [[_TMP1]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP]], align 4 -// CHECK3-NEXT: store i32* [[TMP3]], i32** [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[B]], i32** [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[_TMP1]], align 4 -// CHECK3-NEXT: store i32* [[TMP6]], i32** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store %struct.SS* [[TMP2]], %struct.SS** [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP]], align 4 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store i32* [[B]], i32** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP1]], align 4 +// CHECK3-NEXT: store i32* [[TMP8]], i32** [[TMP7]], align 4 // CHECK3-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(%class.anon.0* noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -468,27 +482,31 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l117 // CHECK3-SAME: () #[[ATTR3]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 1, i32* [[G]], align 128 // CHECK3-NEXT: store i32 2, i32* [[SIVAR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store i32* [[G]], i32** [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[TMP1]], align 4 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[G]], i32** [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store i32* [[SIVAR]], i32** [[TMP2]], align 4 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.2* noundef nonnull align 4 dereferenceable(8) [[REF_TMP]]) // CHECK3-NEXT: ret void // // @@ -584,15 +602,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l136 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -600,6 +620,8 @@ // CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] @@ -611,20 +633,20 @@ // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP0]], i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP1:%.*]] = bitcast %struct.S* [[ARRAYIDX1]] to i8* -// CHECK9-NEXT: [[TMP2:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP2:%.*]] = bitcast %struct.S* [[ARRAYIDX1]] to i8* +// CHECK9-NEXT: [[TMP3:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP2]], i8* align 4 [[TMP3]], i64 4, i1 false) // CHECK9-NEXT: store i32 3, i32* [[SIVAR]], align 4 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP3]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP4]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -647,22 +669,22 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 128 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: call void @_ZN3SSTIiEC1Ev(%struct.SST* noundef nonnull align 4 dereferenceable(4) [[SST]]) // CHECK9-NEXT: store i32 0, i32* [[T_VAR]], align 128 // CHECK9-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP0]], i8* align 128 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) -// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef signext 3) +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK9-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef signext 3) // CHECK9-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l86.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0) // CHECK9-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK9-NEXT: br i1 [[TMP2]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] @@ -671,18 +693,18 @@ // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done1: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK9-NEXT: ret i32 [[TMP4]] // @@ -732,18 +754,21 @@ // CHECK9-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -751,21 +776,23 @@ // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[TMP]], align 8 // CHECK9-NEXT: store i32* [[C]], i32** [[_TMP1]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK9-NEXT: store i32 [[INC]], i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[B]], align 4 -// CHECK9-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK9-NEXT: store i32 [[INC]], i32* [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK9-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP5]], -1 // CHECK9-NEXT: store i32 [[DEC]], i32* [[B]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32*, i32** [[_TMP1]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK9-NEXT: store i32 [[DIV]], i32* [[TMP4]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32*, i32** [[_TMP1]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP7]], 1 +// CHECK9-NEXT: store i32 [[DIV]], i32* [[TMP6]], align 4 // CHECK9-NEXT: ret void // // @@ -804,12 +831,12 @@ // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK9-NEXT: ret void // // @@ -824,85 +851,89 @@ // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK9-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l86 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 128 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: -// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[T_VAR]], align 128 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR]], align 128 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP0]], i32* [[ARRAYIDX]], align 128 -// CHECK9-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP1:%.*]] = bitcast %struct.S.0* [[ARRAYIDX1]] to i8* -// CHECK9-NEXT: [[TMP2:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP1]], i8* align 128 [[TMP2]], i64 4, i1 false) -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN2]], i64 2 +// CHECK9-NEXT: store i32 [[TMP1]], i32* [[ARRAYIDX]], align 128 +// CHECK9-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP2:%.*]] = bitcast %struct.S.1* [[ARRAYIDX1]] to i8* +// CHECK9-NEXT: [[TMP3:%.*]] = bitcast %struct.S.1* [[VAR]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP2]], i8* align 128 [[TMP3]], i64 4, i1 false) +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN2]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP4]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done3: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, i32* [[F]], align 4 // CHECK9-NEXT: ret void // @@ -942,52 +973,57 @@ // CHECK9-SAME: (%struct.SST* noundef [[THIS:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.SST* [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store %struct.SST* [[TMP0]], %struct.SST** [[TMP1]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SST* noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32*, align 8 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load %struct.SST*, %struct.SST** [[TMP1]], align 8 // CHECK9-NEXT: store i32* [[A]], i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK9-NEXT: store i32 [[INC]], i32* [[TMP1]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK9-NEXT: store i32 [[INC]], i32* [[TMP3]], align 4 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK9-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK9-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK9-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1083,15 +1119,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l136 // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -1099,6 +1137,8 @@ // CHECK11-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] @@ -1110,20 +1150,20 @@ // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR]], align 4 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP0]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP1:%.*]] = bitcast %struct.S* [[ARRAYIDX1]] to i8* -// CHECK11-NEXT: [[TMP2:%.*]] = bitcast %struct.S* [[VAR]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP2:%.*]] = bitcast %struct.S* [[ARRAYIDX1]] to i8* +// CHECK11-NEXT: [[TMP3:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP2]], i8* align 4 [[TMP3]], i32 4, i1 false) // CHECK11-NEXT: store i32 3, i32* [[SIVAR]], align 4 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i32 2 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN2]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP3]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP4]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -1146,22 +1186,22 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 128 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: call void @_ZN3SSTIiEC1Ev(%struct.SST* noundef nonnull align 4 dereferenceable(4) [[SST]]) // CHECK11-NEXT: store i32 0, i32* [[T_VAR]], align 128 // CHECK11-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 128 [[TMP0]], i8* align 128 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i32 1 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) // CHECK11-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l86.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0) // CHECK11-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK11-NEXT: br i1 [[TMP2]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] @@ -1170,18 +1210,18 @@ // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP3]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done1: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 // CHECK11-NEXT: ret i32 [[TMP4]] // @@ -1231,18 +1271,21 @@ // CHECK11-SAME: (%struct.SS* noundef [[THIS:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.SS* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store %struct.SS* [[TMP0]], %struct.SS** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SS* noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SS*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 // CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -1250,21 +1293,23 @@ // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store %struct.SS* [[THIS]], %struct.SS** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.SS*, %struct.SS** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.SS*, %struct.SS** [[TMP1]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[TMP]], align 4 // CHECK11-NEXT: store i32* [[C]], i32** [[_TMP1]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK11-NEXT: store i32 [[INC]], i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[B]], align 4 -// CHECK11-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK11-NEXT: store i32 [[INC]], i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK11-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP5]], -1 // CHECK11-NEXT: store i32 [[DEC]], i32* [[B]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[_TMP1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK11-NEXT: store i32 [[DIV]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[_TMP1]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP7]], 1 +// CHECK11-NEXT: store i32 [[DIV]], i32* [[TMP6]], align 4 // CHECK11-NEXT: ret void // // @@ -1303,12 +1348,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK11-NEXT: ret void // // @@ -1323,85 +1368,89 @@ // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK11-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l86 // CHECK11-SAME: () #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 128 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: -// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i32 1 -// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[T_VAR]], align 128 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR]], align 128 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP0]], i32* [[ARRAYIDX]], align 128 -// CHECK11-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP1:%.*]] = bitcast %struct.S.0* [[ARRAYIDX1]] to i8* -// CHECK11-NEXT: [[TMP2:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 128 [[TMP1]], i8* align 128 [[TMP2]], i32 4, i1 false) -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN2]], i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[ARRAYIDX]], align 128 +// CHECK11-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = bitcast %struct.S.1* [[ARRAYIDX1]] to i8* +// CHECK11-NEXT: [[TMP3:%.*]] = bitcast %struct.S.1* [[VAR]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 128 [[TMP2]], i8* align 128 [[TMP3]], i32 4, i1 false) +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN2]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP4]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done3: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, i32* [[F]], align 4 // CHECK11-NEXT: ret void // @@ -1441,52 +1490,57 @@ // CHECK11-SAME: (%struct.SST* noundef [[THIS:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.SST*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.SST* [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store %struct.SST* [[TMP0]], %struct.SST** [[TMP1]], align 4 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.SST* noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.SST*, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store %struct.SST* [[THIS]], %struct.SST** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.SST*, %struct.SST** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load %struct.SST*, %struct.SST** [[TMP1]], align 4 // CHECK11-NEXT: store i32* [[A]], i32** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK11-NEXT: store i32 [[INC]], i32* [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK11-NEXT: store i32 [[INC]], i32* [[TMP3]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK11-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK11-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 4 -// CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 4 +// CHECK11-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 4 +// CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/tile_codegen.cpp b/clang/test/OpenMP/tile_codegen.cpp --- a/clang/test/OpenMP/tile_codegen.cpp +++ b/clang/test/OpenMP/tile_codegen.cpp @@ -880,15 +880,17 @@ // CHECK1-LABEL: define {{[^@]+}}@foo6 // CHECK1-SAME: () #[[ATTR0]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 @@ -900,68 +902,70 @@ // CHECK1-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 7, i32* [[I]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 0 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTTILE_0_IV_I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTTILE_0_IV_I]], align 4 // CHECK1-NEXT: br label [[FOR_COND:%.*]] // CHECK1: for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 5 // CHECK1-NEXT: [[CMP3:%.*]] = icmp slt i32 4, [[ADD2]] // CHECK1-NEXT: br i1 [[CMP3]], label [[COND_TRUE4:%.*]], label [[COND_FALSE5:%.*]] // CHECK1: cond.true4: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false5: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 5 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: // CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 4, [[COND_TRUE4]] ], [ [[ADD6]], [[COND_FALSE5]] ] -// CHECK1-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP9]], [[COND8]] +// CHECK1-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP10]], [[COND8]] // CHECK1-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] // CHECK1: for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 -// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP12]], 3 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP13]], 3 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 7, [[MUL10]] // CHECK1-NEXT: store i32 [[ADD11]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP13]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP14]]) // CHECK1-NEXT: br label [[FOR_INC:%.*]] // CHECK1: for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[DOTTILE_0_IV_I]], align 4 // CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: for.end: @@ -969,14 +973,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -1874,15 +1878,17 @@ // CHECK2-LABEL: define {{[^@]+}}@foo6 // CHECK2-SAME: () #[[ATTR2]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 @@ -1894,68 +1900,70 @@ // CHECK2-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // CHECK2-NEXT: store i32 7, i32* [[I]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 0 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 0 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTTILE_0_IV_I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTTILE_0_IV_I]], align 4 // CHECK2-NEXT: br label [[FOR_COND:%.*]] // CHECK2: for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 5 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 5 // CHECK2-NEXT: [[CMP3:%.*]] = icmp slt i32 4, [[ADD2]] // CHECK2-NEXT: br i1 [[CMP3]], label [[COND_TRUE4:%.*]], label [[COND_FALSE5:%.*]] // CHECK2: cond.true4: // CHECK2-NEXT: br label [[COND_END7:%.*]] // CHECK2: cond.false5: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 5 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 5 // CHECK2-NEXT: br label [[COND_END7]] // CHECK2: cond.end7: // CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 4, [[COND_TRUE4]] ], [ [[ADD6]], [[COND_FALSE5]] ] -// CHECK2-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP9]], [[COND8]] +// CHECK2-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP10]], [[COND8]] // CHECK2-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] // CHECK2: for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP12]], 3 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP13]], 3 // CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 7, [[MUL10]] // CHECK2-NEXT: store i32 [[ADD11]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP13]]) +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP14]]) // CHECK2-NEXT: br label [[FOR_INC:%.*]] // CHECK2: for.inc: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK2-NEXT: store i32 [[INC]], i32* [[DOTTILE_0_IV_I]], align 4 // CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK2: for.end: @@ -1963,14 +1971,14 @@ // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK2-NEXT: ret void // // diff --git a/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp b/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp --- a/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp +++ b/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // Check code generation // RUN: %clang_cc1 -no-opaque-pointers -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR @@ -14,16 +15,23 @@ // IR-LABEL: @func( -// IR-NEXT: [[ENTRY:.*]]: -// IR-NEXT: %[[START_ADDR:.+]] = alloca i32, align 4 -// IR-NEXT: %[[END_ADDR:.+]] = alloca i32, align 4 -// IR-NEXT: %[[STEP_ADDR:.+]] = alloca i32, align 4 -// IR-NEXT: store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4 -// IR-NEXT: store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4 -// IR-NEXT: store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4 -// IR-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @2, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* %[[END_ADDR]], i32* %[[STEP_ADDR]], i32* %[[START_ADDR]]) +// IR-NEXT: entry: +// IR-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// IR-NEXT: store i32 [[START:%.*]], i32* [[START_ADDR]], align 4 +// IR-NEXT: store i32 [[END:%.*]], i32* [[END_ADDR]], align 4 +// IR-NEXT: store i32 [[STEP:%.*]], i32* [[STEP_ADDR]], align 4 +// IR-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// IR-NEXT: store i32* [[END_ADDR]], i32** [[TMP0]], align 8 +// IR-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// IR-NEXT: store i32* [[STEP_ADDR]], i32** [[TMP1]], align 8 +// IR-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// IR-NEXT: store i32* [[START_ADDR]], i32** [[TMP2]], align 8 +// IR-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // IR-NEXT: ret void -// IR-NEXT: } +// extern "C" void func(int start, int end, int step) { #pragma omp parallel for #pragma omp unroll partial(7) @@ -32,179 +40,7 @@ } -// IR-LABEL: @.omp_outlined.( -// IR-NEXT: [[ENTRY:.*]]: -// IR-NEXT: %[[DOTGLOBAL_TID__ADDR:.+]] = alloca i32*, align 8 -// IR-NEXT: %[[DOTBOUND_TID__ADDR:.+]] = alloca i32*, align 8 -// IR-NEXT: %[[END_ADDR:.+]] = alloca i32*, align 8 -// IR-NEXT: %[[STEP_ADDR:.+]] = alloca i32*, align 8 -// IR-NEXT: %[[START_ADDR:.+]] = alloca i32*, align 8 -// IR-NEXT: %[[DOTOMP_IV:.+]] = alloca i32, align 4 -// IR-NEXT: %[[TMP:.+]] = alloca i32, align 4 -// IR-NEXT: %[[I:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_1:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_8:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTUNROLLED_IV_I:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTOMP_LB:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTOMP_UB:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTOMP_STRIDE:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTUNROLLED_IV_I12:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTUNROLL_INNER_IV_I:.+]] = alloca i32, align 4 -// IR-NEXT: store i32* %[[DOTGLOBAL_TID_:.+]], i32** %[[DOTGLOBAL_TID__ADDR]], align 8 -// IR-NEXT: store i32* %[[DOTBOUND_TID_:.+]], i32** %[[DOTBOUND_TID__ADDR]], align 8 -// IR-NEXT: store i32* %[[END:.+]], i32** %[[END_ADDR]], align 8 -// IR-NEXT: store i32* %[[STEP:.+]], i32** %[[STEP_ADDR]], align 8 -// IR-NEXT: store i32* %[[START:.+]], i32** %[[START_ADDR]], align 8 -// IR-NEXT: %[[TMP0:.+]] = load i32*, i32** %[[END_ADDR]], align 8 -// IR-NEXT: %[[TMP1:.+]] = load i32*, i32** %[[STEP_ADDR]], align 8 -// IR-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[START_ADDR]], align 8 -// IR-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4 -// IR-NEXT: store i32 %[[TMP3]], i32* %[[I]], align 4 -// IR-NEXT: %[[TMP4:.+]] = load i32, i32* %[[TMP2]], align 4 -// IR-NEXT: store i32 %[[TMP4]], i32* %[[DOTCAPTURE_EXPR_]], align 4 -// IR-NEXT: %[[TMP5:.+]] = load i32, i32* %[[TMP0]], align 4 -// IR-NEXT: store i32 %[[TMP5]], i32* %[[DOTCAPTURE_EXPR_1]], align 4 -// IR-NEXT: %[[TMP6:.+]] = load i32, i32* %[[TMP1]], align 4 -// IR-NEXT: store i32 %[[TMP6]], i32* %[[DOTCAPTURE_EXPR_2]], align 4 -// IR-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_1]], align 4 -// IR-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4 -// IR-NEXT: %[[SUB:.+]] = sub i32 %[[TMP7]], %[[TMP8]] -// IR-NEXT: %[[SUB4:.+]] = sub i32 %[[SUB]], 1 -// IR-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4 -// IR-NEXT: %[[ADD:.+]] = add i32 %[[SUB4]], %[[TMP9]] -// IR-NEXT: %[[TMP10:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4 -// IR-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP10]] -// IR-NEXT: %[[SUB5:.+]] = sub i32 %[[DIV]], 1 -// IR-NEXT: store i32 %[[SUB5]], i32* %[[DOTCAPTURE_EXPR_3]], align 4 -// IR-NEXT: %[[TMP11:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4 -// IR-NEXT: %[[ADD7:.+]] = add i32 %[[TMP11]], 1 -// IR-NEXT: store i32 %[[ADD7]], i32* %[[DOTCAPTURE_EXPR_6]], align 4 -// IR-NEXT: %[[TMP12:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4 -// IR-NEXT: %[[SUB9:.+]] = sub i32 %[[TMP12]], -6 -// IR-NEXT: %[[DIV10:.+]] = udiv i32 %[[SUB9]], 7 -// IR-NEXT: %[[SUB11:.+]] = sub i32 %[[DIV10]], 1 -// IR-NEXT: store i32 %[[SUB11]], i32* %[[DOTCAPTURE_EXPR_8]], align 4 -// IR-NEXT: store i32 0, i32* %[[DOTUNROLLED_IV_I]], align 4 -// IR-NEXT: %[[TMP13:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4 -// IR-NEXT: %[[CMP:.+]] = icmp ult i32 0, %[[TMP13]] -// IR-NEXT: br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_PRECOND_THEN]]: -// IR-NEXT: store i32 0, i32* %[[DOTOMP_LB]], align 4 -// IR-NEXT: %[[TMP14:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4 -// IR-NEXT: store i32 %[[TMP14]], i32* %[[DOTOMP_UB]], align 4 -// IR-NEXT: store i32 1, i32* %[[DOTOMP_STRIDE]], align 4 -// IR-NEXT: store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4 -// IR-NEXT: %[[TMP15:.+]] = load i32*, i32** %[[DOTGLOBAL_TID__ADDR]], align 8 -// IR-NEXT: %[[TMP16:.+]] = load i32, i32* %[[TMP15]], align 4 -// IR-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[TMP16]], i32 34, i32* %[[DOTOMP_IS_LAST]], i32* %[[DOTOMP_LB]], i32* %[[DOTOMP_UB]], i32* %[[DOTOMP_STRIDE]], i32 1, i32 1) -// IR-NEXT: %[[TMP17:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4 -// IR-NEXT: %[[TMP18:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4 -// IR-NEXT: %[[CMP13:.+]] = icmp ugt i32 %[[TMP17]], %[[TMP18]] -// IR-NEXT: br i1 %[[CMP13]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// IR-EMPTY: -// IR-NEXT: [[COND_TRUE]]: -// IR-NEXT: %[[TMP19:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4 -// IR-NEXT: br label %[[COND_END:.+]] -// IR-EMPTY: -// IR-NEXT: [[COND_FALSE]]: -// IR-NEXT: %[[TMP20:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4 -// IR-NEXT: br label %[[COND_END]] -// IR-EMPTY: -// IR-NEXT: [[COND_END]]: -// IR-NEXT: %[[COND:.+]] = phi i32 [ %[[TMP19]], %[[COND_TRUE]] ], [ %[[TMP20]], %[[COND_FALSE]] ] -// IR-NEXT: store i32 %[[COND]], i32* %[[DOTOMP_UB]], align 4 -// IR-NEXT: %[[TMP21:.+]] = load i32, i32* %[[DOTOMP_LB]], align 4 -// IR-NEXT: store i32 %[[TMP21]], i32* %[[DOTOMP_IV]], align 4 -// IR-NEXT: br label %[[OMP_INNER_FOR_COND:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_INNER_FOR_COND]]: -// IR-NEXT: %[[TMP22:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4 -// IR-NEXT: %[[TMP23:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4 -// IR-NEXT: %[[ADD14:.+]] = add i32 %[[TMP23]], 1 -// IR-NEXT: %[[CMP15:.+]] = icmp ult i32 %[[TMP22]], %[[ADD14]] -// IR-NEXT: br i1 %[[CMP15]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_INNER_FOR_BODY]]: -// IR-NEXT: %[[TMP24:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4 -// IR-NEXT: %[[MUL:.+]] = mul i32 %[[TMP24]], 7 -// IR-NEXT: %[[ADD16:.+]] = add i32 0, %[[MUL]] -// IR-NEXT: store i32 %[[ADD16]], i32* %[[DOTUNROLLED_IV_I12]], align 4 -// IR-NEXT: %[[TMP25:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I12]], align 4 -// IR-NEXT: store i32 %[[TMP25]], i32* %[[DOTUNROLL_INNER_IV_I]], align 4 -// IR-NEXT: br label %[[FOR_COND:.+]] -// IR-EMPTY: -// IR-NEXT: [[FOR_COND]]: -// IR-NEXT: %[[TMP26:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 -// IR-NEXT: %[[TMP27:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I12]], align 4 -// IR-NEXT: %[[ADD17:.+]] = add i32 %[[TMP27]], 7 -// IR-NEXT: %[[CMP18:.+]] = icmp ult i32 %[[TMP26]], %[[ADD17]] -// IR-NEXT: br i1 %[[CMP18]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] -// IR-EMPTY: -// IR-NEXT: [[LAND_RHS]]: -// IR-NEXT: %[[TMP28:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 -// IR-NEXT: %[[TMP29:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4 -// IR-NEXT: %[[ADD19:.+]] = add i32 %[[TMP29]], 1 -// IR-NEXT: %[[CMP20:.+]] = icmp ult i32 %[[TMP28]], %[[ADD19]] -// IR-NEXT: br label %[[LAND_END]] -// IR-EMPTY: -// IR-NEXT: [[LAND_END]]: -// IR-NEXT: %[[TMP30:.+]] = phi i1 [ false, %[[FOR_COND]] ], [ %[[CMP20]], %[[LAND_RHS]] ] -// IR-NEXT: br i1 %[[TMP30]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]] -// IR-EMPTY: -// IR-NEXT: [[FOR_BODY]]: -// IR-NEXT: %[[TMP31:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4 -// IR-NEXT: %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 -// IR-NEXT: %[[TMP33:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4 -// IR-NEXT: %[[MUL21:.+]] = mul i32 %[[TMP32]], %[[TMP33]] -// IR-NEXT: %[[ADD22:.+]] = add i32 %[[TMP31]], %[[MUL21]] -// IR-NEXT: store i32 %[[ADD22]], i32* %[[I]], align 4 -// IR-NEXT: %[[TMP34:.+]] = load i32, i32* %[[TMP2]], align 4 -// IR-NEXT: %[[TMP35:.+]] = load i32, i32* %[[TMP0]], align 4 -// IR-NEXT: %[[TMP36:.+]] = load i32, i32* %[[TMP1]], align 4 -// IR-NEXT: %[[TMP37:.+]] = load i32, i32* %[[I]], align 4 -// IR-NEXT: call void (...) @body(i32 noundef %[[TMP34]], i32 noundef %[[TMP35]], i32 noundef %[[TMP36]], i32 noundef %[[TMP37]]) -// IR-NEXT: br label %[[FOR_INC:.+]] -// IR-EMPTY: -// IR-NEXT: [[FOR_INC]]: -// IR-NEXT: %[[TMP38:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4 -// IR-NEXT: %[[INC:.+]] = add i32 %[[TMP38]], 1 -// IR-NEXT: store i32 %[[INC]], i32* %[[DOTUNROLL_INNER_IV_I]], align 4 -// IR-NEXT: br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]] -// IR-EMPTY: -// IR-NEXT: [[FOR_END]]: -// IR-NEXT: br label %[[OMP_BODY_CONTINUE:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_BODY_CONTINUE]]: -// IR-NEXT: br label %[[OMP_INNER_FOR_INC:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_INNER_FOR_INC]]: -// IR-NEXT: %[[TMP39:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4 -// IR-NEXT: %[[ADD23:.+]] = add i32 %[[TMP39]], 1 -// IR-NEXT: store i32 %[[ADD23]], i32* %[[DOTOMP_IV]], align 4 -// IR-NEXT: br label %[[OMP_INNER_FOR_COND]] -// IR-EMPTY: -// IR-NEXT: [[OMP_INNER_FOR_END]]: -// IR-NEXT: br label %[[OMP_LOOP_EXIT:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_LOOP_EXIT]]: -// IR-NEXT: %[[TMP40:.+]] = load i32*, i32** %[[DOTGLOBAL_TID__ADDR]], align 8 -// IR-NEXT: %[[TMP41:.+]] = load i32, i32* %[[TMP40]], align 4 -// IR-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[TMP41]]) -// IR-NEXT: br label %[[OMP_PRECOND_END]] -// IR-EMPTY: -// IR-NEXT: [[OMP_PRECOND_END]]: -// IR-NEXT: ret void -// IR-NEXT: } #endif /* HEADER */ -// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]], ![[LOOPPROP4:[0-9]+]]} -// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"} -// IR: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.count", i32 7} diff --git a/clang/test/OpenMP/vla_crash.c b/clang/test/OpenMP/vla_crash.c --- a/clang/test/OpenMP/vla_crash.c +++ b/clang/test/OpenMP/vla_crash.c @@ -27,6 +27,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[B:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[C:%.*]] = alloca i32***, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -34,51 +35,58 @@ // CHECK1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* @a, align 4 // CHECK1-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32** [[B]], i32*** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i32**** [[C]], i32***** [[TMP8]], align 8 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]], i32** [[B]], i64 [[TMP4]], i32**** [[C]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i64 noundef [[VLA1:%.*]], i32**** noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i32**, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32****, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i32** [[B]], i32*** [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i32**** [[C]], i32***** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32****, i32***** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32***, i32**** [[TMP3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32**, i32*** [[TMP4]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32**, i32*** [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* @a, align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32*, i32** [[TMP5]], i64 [[TMP7]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32*, i32** [[ARRAYIDX3]], i64 0 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[ARRAYIDX4]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* @a, align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nsw i64 0, [[TMP0]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i64 [[TMP12]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX7]], i64 0 -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX8]], align 4 +// CHECK1-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32****, i32***** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32***, i32**** [[TMP8]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32**, i32*** [[TMP9]], i64 0 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32**, i32*** [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* @a, align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP6]] +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32*, i32** [[TMP10]], i64 [[TMP12]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32*, i32** [[ARRAYIDX1]], i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* @a, align 4 +// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i64 [[IDXPROM3]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX4]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = mul nsw i64 0, [[TMP2]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i64 [[TMP17]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX5]], i64 0 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX6]], align 4 // CHECK1-NEXT: ret void // // @@ -88,6 +96,7 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[P:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -97,33 +106,39 @@ // CHECK1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 // CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32** [[A_ADDR]] to i32* // CHECK1-NEXT: store i32* [[TMP3]], i32** [[P]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i32** [[P]], i32*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i32** [[A_ADDR]], i32*** [[TMP6]], align 8 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]], i32** [[P]], i32** [[A_ADDR]]) #[[ATTR2]] +// CHECK1-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[P:%.*]], i32** noundef nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[P_ADDR:%.*]] = alloca i32**, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32**, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i32** [[P]], i32*** [[P_ADDR]], align 8 -// CHECK1-NEXT: store i32** [[A]], i32*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32**, i32*** [[P_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32**, i32*** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32** [[TMP2]] to i32* -// CHECK1-NEXT: [[CMP:%.*]] = icmp eq i32* [[TMP3]], [[TMP4]] +// CHECK1-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32**, i32*** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32** [[TMP6]] to i32* +// CHECK1-NEXT: [[CMP:%.*]] = icmp eq i32* [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] // CHECK1: if.then: // CHECK1-NEXT: br label [[IF_END]] diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -439,7 +439,7 @@ __OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8, Int1) __OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) __OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32, - VoidPtr, VoidPtr, VoidPtrPtr, SizeTy) + VoidPtr, VoidPtr, VoidPtr) __OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr) __OMP_RTL(__kmpc_kernel_end_parallel, false, Void, ) __OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32) @@ -456,9 +456,8 @@ __OMP_RTL(__kmpc_alloc_shared, false, VoidPtr, SizeTy) __OMP_RTL(__kmpc_free_shared, false, Void, VoidPtr, SizeTy) -__OMP_RTL(__kmpc_begin_sharing_variables, false, Void, VoidPtrPtrPtr, SizeTy) -__OMP_RTL(__kmpc_end_sharing_variables, false, Void, ) -__OMP_RTL(__kmpc_get_shared_variables, false, Void, VoidPtrPtrPtr) +__OMP_RTL(__kmpc_alloc_aggregate_arg, false, VoidPtr, VoidPtr, VoidPtr) +__OMP_RTL(__kmpc_get_shared_variables_aggregate, false, Void, VoidPtrPtr) __OMP_RTL(__kmpc_parallel_level, false, Int8, ) __OMP_RTL(__kmpc_is_spmd_exec_mode, false, Int8, ) __OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32) @@ -918,6 +917,9 @@ __OMP_RTL_ATTRS(__kmpc_free_shared, DeviceAllocAttrs, AttributeSet(), ParamAttrs(NoCaptureAttrs)) +__OMP_RTL_ATTRS(__kmpc_alloc_aggregate_arg, DefaultAttrs, ReturnPtrAttrs, + ParamAttrs()) + __OMP_RTL_ATTRS(__kmpc_alloc, DefaultAttrs, ReturnPtrAttrs, ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_aligned_alloc, DefaultAttrs, ReturnPtrAttrs, ParamAttrs()) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -4260,6 +4260,7 @@ case OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2: case OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2: case OMPRTL___kmpc_nvptx_end_reduce_nowait: + case OMPRTL___kmpc_alloc_aggregate_arg: break; case OMPRTL___kmpc_distribute_static_init_4: case OMPRTL___kmpc_distribute_static_init_4u: diff --git a/openmp/libomptarget/DeviceRTL/include/Interface.h b/openmp/libomptarget/DeviceRTL/include/Interface.h --- a/openmp/libomptarget/DeviceRTL/include/Interface.h +++ b/openmp/libomptarget/DeviceRTL/include/Interface.h @@ -182,25 +182,22 @@ /// memory configured at launch. void *__kmpc_get_dynamic_shared(); -/// Allocate sufficient space for \p NumArgs sequential `void*` and store the -/// allocation address in \p GlobalArgs. -/// -/// Called by the main thread prior to a parallel region. -/// -/// We also remember it in GlobalArgsPtr to ensure the worker threads and -/// deallocation function know the allocation address too. -void __kmpc_begin_sharing_variables(void ***GlobalArgs, uint64_t NumArgs); - -/// Deallocate the memory allocated by __kmpc_begin_sharing_variables. -/// -/// Called by the main thread after a parallel region. -void __kmpc_end_sharing_variables(); +/// Return a pointer in memory to store the aggregate argument of an outlined +/// parallel region, either using a local memory allocation when in SPMD mode +/// expected in \p LocalPtr, or from shared memory, expected in \p GlobalPtr. +void *__kmpc_alloc_aggregate_arg(void *LocalPtr, void *GlobalPtr); -/// Store the allocation address obtained via __kmpc_begin_sharing_variables in +/// Store the pointer of the struct aggregating shared variables in /// \p GlobalArgs. /// /// Called by the worker threads in the parallel region (function). -void __kmpc_get_shared_variables(void ***GlobalArgs); +void __kmpc_get_shared_variables_aggregate(void **GlobalArgs); + +/// Store the pointer of the struct aggregating shared variables in \p args +/// to team-shared memory for worker threads to access. +/// +/// Called by the main thread to initiate parallel region execution. +void __kmpc_set_shared_variables_aggregate(void *args); /// External interface to get the thread ID. uint32_t __kmpc_get_hardware_thread_id_in_block(); diff --git a/openmp/libomptarget/DeviceRTL/include/generated_microtask_cases.gen b/openmp/libomptarget/DeviceRTL/include/generated_microtask_cases.gen deleted file mode 100644 --- a/openmp/libomptarget/DeviceRTL/include/generated_microtask_cases.gen +++ /dev/null @@ -1,405 +0,0 @@ -case 0: -((void (*)(int32_t *, int32_t * -))fn)(&global_tid, &bound_tid -); -break; -case 1: -((void (*)(int32_t *, int32_t * -, void *))fn)(&global_tid, &bound_tid -, args[0]); -break; -case 2: -((void (*)(int32_t *, int32_t * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1]); -break; -case 3: -((void (*)(int32_t *, int32_t * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2]); -break; -case 4: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -); -break; -case 5: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4]); -break; -case 6: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5]); -break; -case 7: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6]); -break; -case 8: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -); -break; -case 9: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8]); -break; -case 10: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9]); -break; -case 11: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10]); -break; -case 12: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -); -break; -case 13: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12]); -break; -case 14: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13]); -break; -case 15: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14]); -break; -case 16: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -); -break; -case 17: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16]); -break; -case 18: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17]); -break; -case 19: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18]); -break; -case 20: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -); -break; -case 21: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20]); -break; -case 22: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21]); -break; -case 23: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22]); -break; -case 24: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -); -break; -case 25: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24]); -break; -case 26: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24], args[25]); -break; -case 27: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24], args[25], args[26]); -break; -case 28: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24], args[25], args[26], args[27] -); -break; -case 29: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24], args[25], args[26], args[27] -, args[28]); -break; -case 30: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24], args[25], args[26], args[27] -, args[28], args[29]); -break; -case 31: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24], args[25], args[26], args[27] -, args[28], args[29], args[30]); -break; -case 32: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24], args[25], args[26], args[27] -, args[28], args[29], args[30], args[31] -); -break; diff --git a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp --- a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp @@ -63,16 +63,11 @@ return NumThreads; } -// Invoke an outlined parallel function unwrapping arguments (up to 32). +// Invoke an outlined parallel function. void invokeMicrotask(int32_t global_tid, int32_t bound_tid, void *fn, - void **args, int64_t nargs) { + void *args) { DebugEntryRAII Entry(__FILE__, __LINE__, ""); - switch (nargs) { -#include "generated_microtask_cases.gen" - default: - PRINT("Too many arguments in kmp_invoke_microtask, aborting execution.\n"); - __builtin_trap(); - } + ((void (*)(int32_t *, int32_t *, void *))fn)(&global_tid, &bound_tid, args); } } // namespace @@ -81,7 +76,7 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr, int32_t num_threads, int proc_bind, void *fn, - void *wrapper_fn, void **args, int64_t nargs) { + void *wrapper_fn, void *args) { FunctionTracingRAII(); uint32_t TId = mapping::getThreadIdInBlock(); @@ -89,7 +84,8 @@ if (OMP_UNLIKELY(!if_expr || icv::Level)) { state::DateEnvironmentRAII DERAII(ident); ++icv::Level; - invokeMicrotask(TId, 0, fn, args, nargs); + invokeMicrotask(TId, 0, fn, args); + state::exitDataEnvironment(); return; } @@ -117,7 +113,7 @@ ASSERT(icv::Level == 1u); if (TId < NumThreads) - invokeMicrotask(TId, 0, fn, args, nargs); + invokeMicrotask(TId, 0, fn, args); // Synchronize all threads at the end of a parallel region. synchronize::threadsAligned(); @@ -143,70 +139,11 @@ bool IsActiveParallelRegion = NumThreads > 1; if (!IsActiveParallelRegion) { state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true, ident); - invokeMicrotask(TId, 0, fn, args, nargs); + invokeMicrotask(TId, 0, fn, args); return; } - void **GlobalArgs = nullptr; - if (nargs) { - __kmpc_begin_sharing_variables(&GlobalArgs, nargs); - switch (nargs) { - default: - for (int I = 0; I < nargs; I++) - GlobalArgs[I] = args[I]; - break; - case 16: - GlobalArgs[15] = args[15]; - [[fallthrough]]; - case 15: - GlobalArgs[14] = args[14]; - [[fallthrough]]; - case 14: - GlobalArgs[13] = args[13]; - [[fallthrough]]; - case 13: - GlobalArgs[12] = args[12]; - [[fallthrough]]; - case 12: - GlobalArgs[11] = args[11]; - [[fallthrough]]; - case 11: - GlobalArgs[10] = args[10]; - [[fallthrough]]; - case 10: - GlobalArgs[9] = args[9]; - [[fallthrough]]; - case 9: - GlobalArgs[8] = args[8]; - [[fallthrough]]; - case 8: - GlobalArgs[7] = args[7]; - [[fallthrough]]; - case 7: - GlobalArgs[6] = args[6]; - [[fallthrough]]; - case 6: - GlobalArgs[5] = args[5]; - [[fallthrough]]; - case 5: - GlobalArgs[4] = args[4]; - [[fallthrough]]; - case 4: - GlobalArgs[3] = args[3]; - [[fallthrough]]; - case 3: - GlobalArgs[2] = args[2]; - [[fallthrough]]; - case 2: - GlobalArgs[1] = args[1]; - [[fallthrough]]; - case 1: - GlobalArgs[0] = args[0]; - [[fallthrough]]; - case 0: - break; - } - } + __kmpc_set_shared_variables_aggregate(args); { // Note that the order here is important. `icv::Level` has to be updated @@ -224,9 +161,6 @@ // Master waits for workers to signal. synchronize::threads(); } - - if (nargs) - __kmpc_end_sharing_variables(); } __attribute__((noinline)) bool diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -532,41 +532,21 @@ void *llvm_omp_get_dynamic_shared() { return __kmpc_get_dynamic_shared(); } -/// Allocate storage in shared memory to communicate arguments from the main -/// thread to the workers in generic mode. If we exceed -/// NUM_SHARED_VARIABLES_IN_SHARED_MEM we will malloc space for communication. -constexpr uint64_t NUM_SHARED_VARIABLES_IN_SHARED_MEM = 64; - -[[clang::loader_uninitialized]] static void - *SharedMemVariableSharingSpace[NUM_SHARED_VARIABLES_IN_SHARED_MEM]; -#pragma omp allocate(SharedMemVariableSharingSpace) \ - allocator(omp_pteam_mem_alloc) -[[clang::loader_uninitialized]] static void **SharedMemVariableSharingSpacePtr; -#pragma omp allocate(SharedMemVariableSharingSpacePtr) \ - allocator(omp_pteam_mem_alloc) - -void __kmpc_begin_sharing_variables(void ***GlobalArgs, uint64_t nArgs) { - FunctionTracingRAII(); - if (nArgs <= NUM_SHARED_VARIABLES_IN_SHARED_MEM) { - SharedMemVariableSharingSpacePtr = &SharedMemVariableSharingSpace[0]; - } else { - SharedMemVariableSharingSpacePtr = (void **)memory::allocGlobal( - nArgs * sizeof(void *), "new extended args"); - ASSERT(SharedMemVariableSharingSpacePtr != nullptr && - "Nullptr returned by malloc!"); - } - *GlobalArgs = SharedMemVariableSharingSpacePtr; +void *__kmpc_alloc_aggregate_arg(void *LocalPtr, void *GlobalPtr) { + return (__kmpc_is_spmd_exec_mode() ? LocalPtr : GlobalPtr); } -void __kmpc_end_sharing_variables() { - FunctionTracingRAII(); - if (SharedMemVariableSharingSpacePtr != &SharedMemVariableSharingSpace[0]) - memory::freeGlobal(SharedMemVariableSharingSpacePtr, "new extended args"); +/// Allocate storage in shared memory to communicate the struct aggregating +/// arguments from the main thread to the workers in generic mode. +[[clang::loader_uninitialized]] static void *SharedMemAggregatePtr[1]; +#pragma omp allocate(SharedMemAggregatePtr) allocator(omp_pteam_mem_alloc) + +void __kmpc_get_shared_variables_aggregate(void **GlobalArgs) { + *GlobalArgs = SharedMemAggregatePtr[0]; } -void __kmpc_get_shared_variables(void ***GlobalArgs) { - FunctionTracingRAII(); - *GlobalArgs = SharedMemVariableSharingSpacePtr; +void __kmpc_set_shared_variables_aggregate(void *args) { + SharedMemAggregatePtr[0] = args; } } #pragma omp end declare target diff --git a/openmp/libomptarget/utils/generate_microtask_cases.py b/openmp/libomptarget/utils/generate_microtask_cases.py deleted file mode 100755 --- a/openmp/libomptarget/utils/generate_microtask_cases.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python3 - -import argparse - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('--max_args', type=int, help='Max number of arguments to generate case statements for', required=True) - parser.add_argument('--output', help='Output header file to include', required=True) - args = parser.parse_args() - - output='' - for i in range(args.max_args+1): - output += 'case %d:\n'%(i) - output += '((void (*)(kmp_int32 *, kmp_int32 *\n' - for j in range(i): - output += ', void *' - if (j+1)%4 == 0: - output += '\n' - output += '))fn)(&global_tid, &bound_tid\n' - for j in range(i): - output += ', args[%d]'%(j) - if (j+1)%4 == 0: - output += '\n' - output += ');\n' - output += 'break;\n' - - with open(args.output, 'w') as f: - print(output, file=f) - -if __name__ == "__main__": - main()